This is an automated email from the ASF dual-hosted git repository. arnabp20 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push: new abe4b1d [SYSTEMDS-2772] Add Autoencoder (2-layer) builtin abe4b1d is described below commit abe4b1d9cecb81c95087f64dd7e572b1ba41abcc Author: Artem Kroviakov <mr.krov...@gmail.com> AuthorDate: Mon Dec 28 19:37:53 2020 +0100 [SYSTEMDS-2772] Add Autoencoder (2-layer) builtin DIA project WS2020/21. --- scripts/builtin/.autoencoder_2layer.dml.swp | Bin 0 -> 24576 bytes scripts/builtin/autoencoder_2layer.dml | 234 ++++++++++++++++++ .../java/org/apache/sysds/common/Builtins.java | 1 + .../builtin/BuiltinAutoencoder2LayerTest.java | 205 ++++++++++++++++ .../scripts/functions/builtin/autoencoder_2layer.R | 264 +++++++++++++++++++++ .../functions/builtin/autoencoder_2layer.dml | 35 +++ 6 files changed, 739 insertions(+) diff --git a/scripts/builtin/.autoencoder_2layer.dml.swp b/scripts/builtin/.autoencoder_2layer.dml.swp new file mode 100644 index 0000000..a4d3db2 Binary files /dev/null and b/scripts/builtin/.autoencoder_2layer.dml.swp differ diff --git a/scripts/builtin/autoencoder_2layer.dml b/scripts/builtin/autoencoder_2layer.dml new file mode 100644 index 0000000..1f54604 --- /dev/null +++ b/scripts/builtin/autoencoder_2layer.dml @@ -0,0 +1,234 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# Trains a 2-layer autoencoder with minibatch SGD and step-size decay. +# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder +# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization. +# The script standardizes the input before training (can be turned off). +# Also, it randomly reshuffles rows before training. +# Currently, tanh is set to be the activation function. +# By re-implementing 'func' DML-bodied function, one can change the activation. + +# INPUT PARAMETERS: +# --------------------------------------------------------------------------------------------- +# NAME TYPE DEFAULT MEANING +# --------------------------------------------------------------------------------------------- +# X String --- Filename where the input is stored +# H1 Int --- Number of neurons in the 1st hidden layer +# H2 Int --- Number of neurons in the 2nd hidden layer +# EPOCH Int --- Number of epochs to train for +# OBJ Boolean FALSE If TRUE, Computes objective function value (squared-loss) +# at the end of each epoch. Note that, computing the full +# objective can take a lot of time. +# BATCH Int 256 Mini-batch size (training parameter) +# STEP Double 1e-5 Initial step size (training parameter) +# DECAY Double 0.95 Decays step size after each epoch (training parameter) +# MOMENTUM Double 0.9 Momentum parameter (training parameter) +# +# W1_rand Int Rand Weights might be initialized via input matrices +# W2_rand Int Rand +# W3_rand Int Rand +# W4_rand Int Rand +# --------------------------------------------------------------------------------------------- +# OUTPUT: +# --------------------------------------------------------------------------------------------- +# NAME TYPE DEFAULT MEANING +# --------------------------------------------------------------------------------------------- +# W1_out Matrix[Double] --- Matrix storing weights between input layer and 1st hidden layer +# b1_out Matrix[Double] --- Matrix storing bias between input layer and 1st hidden layer +# W2_out Matrix[Double] --- Matrix storing weights between 1st hidden layer and 2nd hidden layer +# b2_out Matrix[Double] --- Matrix storing bias between 1st hidden layer and 2nd hidden layer +# W3_out Matrix[Double] --- Matrix storing weights between 2nd hidden layer and 3rd hidden layer +# b3_out Matrix[Double] --- Matrix storing bias between 2nd hidden layer and 3rd hidden layer +# W4_out Matrix[Double] --- Matrix storing weights between 3rd hidden layer and output layer +# b4_out Matrix[Double] --- Matrix storing bias between 3rd hidden layer and output layer +# HIDDEN Matrix[Double] --- Matrix storing the hidden (2nd) layer representation if needed +# --------------------------------------------------------------------------------------------- +# + + +m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer num_hidden2, Integer max_epochs, + Boolean full_obj = FALSE, Integer batch_size = 256, Double step = 1e-5, Double decay = 0.95, Double mu = 0.9, + Matrix[Double] W1_rand = matrix(0, rows=0, cols=0), Matrix[Double] W2_rand = matrix(0, rows=0, cols=0), + Matrix[Double] W3_rand = matrix(0, rows=0, cols=0), Matrix[Double] W4_rand = matrix(0, rows=0, cols=0), + Matrix[Double] order_rand = matrix(0, rows=0, cols=0)) + return(Matrix[Double] W1, Matrix[Double] b1, Matrix[Double] W2, Matrix[Double] b2, + Matrix[Double] W3, Matrix[Double] b3, Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] reordered_H) +{ + n = nrow(X) + m = ncol(X) + #randomly reordering rows + if(nrow(order_rand) == 0 & ncol(order_rand) == 0) + permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1, pdf="uniform"), by=1, index.return=TRUE), n, n) + else + permut = table(seq(1,n,1), order(target=order_rand, by=1, index.return=TRUE), n, n) + X = permut %*% X + + #z-transform, whitening operator is better + means = colSums(X)/n + stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17 + X = (X - means)/stds + + if(nrow(W1_rand) == 0 & ncol(W1_rand) == 0) + W1_rand = Rand(rows=num_hidden1, cols=m, min=-1, max=1, pdf="uniform") + if(nrow(W2_rand) == 0 & ncol(W2_rand) == 0) + W2_rand = Rand(rows=num_hidden2, cols=num_hidden1, min=-1, max=1, pdf="uniform") + if(nrow(W3_rand) == 0 & ncol(W3_rand) == 0) + W3_rand = Rand(rows=num_hidden1, cols=num_hidden2, min=-1, max=1, pdf="uniform") + if(nrow(W4_rand) == 0 & ncol(W4_rand) == 0) + W4_rand = Rand(rows=m, cols=num_hidden1, min=-1, max=1, pdf="uniform") + + W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand + b1 = matrix(0, rows=num_hidden1, cols=1) + W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand + b2 = matrix(0, rows=num_hidden2, cols=1) + W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand + b3 = matrix(0, rows=num_hidden1, cols=1) + W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand + b4 = matrix(0, rows=m, cols=1) + + upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1)) + upd_b1 = matrix(0, rows=nrow(b1), cols=ncol(b1)) + upd_W2 = matrix(0, rows=nrow(W2), cols=ncol(W2)) + upd_b2 = matrix(0, rows=nrow(b2), cols=ncol(b2)) + upd_W3 = matrix(0, rows=nrow(W3), cols=ncol(W3)) + upd_b3 = matrix(0, rows=nrow(b3), cols=ncol(b3)) + upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4)) + upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4)) + + if( full_obj ){ + [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, + full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X) + full_o = obj(full_E) + print("EPOCHS=" + 0 + " OBJ (FULL DATA): " + full_o) + } + + iter = 0 + num_iters_per_epoch = ceil(n / batch_size) + max_iterations = max_epochs * num_iters_per_epoch + #print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations) + beg = 1 + while( iter < max_iterations ){ + end = beg + batch_size - 1 + if(end > n) end = n + X_batch = X[beg:end,] + + [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E] + = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch) + [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad] + = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, W3, W4) + + o = obj(E) + print("epochs=%5.4f BATCH beg=%d end=%d obj=%f", (iter / num_iters_per_epoch), beg, end, o) + + #update + local_step = step / nrow(X_batch) + upd_W1 = mu * upd_W1 - local_step * W1_grad + upd_b1 = mu * upd_b1 - local_step * b1 + upd_W2 = mu * upd_W2 - local_step * W2_grad + upd_b2 = mu * upd_b2 - local_step * b2 + upd_W3 = mu * upd_W3 - local_step * W3_grad + upd_b3 = mu * upd_b3 - local_step * b3 + upd_W4 = mu * upd_W4 - local_step * W4_grad + upd_b4 = mu * upd_b4 - local_step * b4 + W1 = W1 + upd_W1 + b1 = b1 + upd_b1 + W2 = W2 + upd_W2 + b2 = b2 + upd_b2 + W3 = W3 + upd_W3 + b3 = b3 + upd_b3 + W4 = W4 + upd_W4 + b4 = b4 + upd_b4 + + iter = iter + 1 + if(end == n) beg = 1 + else beg = end + 1 + + if( iter %% num_iters_per_epoch == 0 ) step = step * decay + + if( full_obj & iter %% num_iters_per_epoch == 0 ){ + [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, + full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X) + full_o = obj(full_E) + epochs = iter %/% num_iters_per_epoch + print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o) + } + } + + [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat, + full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X) + reordered_H = t(permut) %*% full_H2 +} + +#implements tanh +#to use another activation fn, implement a DML-bodied function with +#function name 'func' and comment out this one +func = function(Matrix[Double] X) return(Matrix[Double] Y, Matrix[Double] Y_prime){ + Y = (exp(2*X) - 1)/(exp(2*X) + 1) + Y_prime = 1 - Y^2 +} + +feedForward = function(Matrix[Double] X, Matrix[Double] W1, Matrix[Double] b1, Matrix[Double] W2, Matrix[Double] b2, + Matrix[Double] W3, Matrix[Double] b3, Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] Y) + return(Matrix[Double] H1, Matrix[Double] H1_prime,Matrix[Double] H2, Matrix[Double] H2_prime, + Matrix[Double] H3, Matrix[Double] H3_prime, Matrix[Double] Yhat, Matrix[Double] Yhat_prime, Matrix[Double] E) +{ + H1_in = t(W1 %*% t(X) + b1) + [H1, H1_prime] = func(H1_in) + + H2_in = t(W2 %*% t(H1) + b2) + [H2, H2_prime] = func(H2_in) + + H3_in = t(W3 %*% t(H2) + b3) + [H3, H3_prime] = func(H3_in) + + Yhat_in = t(W4 %*% t(H3) + b4) + [Yhat, Yhat_prime] = func(Yhat_in) + E = Yhat - Y +} + +grad = function(Matrix[Double] X, Matrix[Double] H1, Matrix[Double] H1_prime, Matrix[Double] H2, Matrix[Double] H2_prime, + Matrix[Double] H3, Matrix[Double] H3_prime, Matrix[Double] Yhat_prime, Matrix[Double] E, + Matrix[Double] W1, Matrix[Double] W2, Matrix[Double] W3, Matrix[Double] W4) + return(Matrix[Double] W1_grad, Matrix[Double] b1_grad,Matrix[Double] W2_grad, Matrix[Double] b2_grad, + Matrix[Double] W3_grad, Matrix[Double] b3_grad, Matrix[Double] W4_grad, Matrix[Double] b4_grad) +{ + #backprop + delta4 = E * Yhat_prime + delta3 = H3_prime * (delta4 %*% W4) + delta2 = H2_prime * (delta3 %*% W3) + delta1 = H1_prime * (delta2 %*% W2) + + #compute gradients + b4_grad = t(colSums(delta4)) + b3_grad = t(colSums(delta3)) + b2_grad = t(colSums(delta2)) + b1_grad = t(colSums(delta1)) + + W4_grad = t(delta4) %*% H3 + W3_grad = t(delta3) %*% H2 + W2_grad = t(delta2) %*% H1 + W1_grad = t(delta1) %*% X +} + +obj = function(Matrix[Double] E) return(Double val){ + val = 0.5 * sum(E^2) +} diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java index 19cfa16..c63fa45 100644 --- a/src/main/java/org/apache/sysds/common/Builtins.java +++ b/src/main/java/org/apache/sysds/common/Builtins.java @@ -47,6 +47,7 @@ public enum Builtins { ALS_DS("alsDS", true), ASIN("asin", false), ATAN("atan", false), + AUTOENCODER2LAYER("autoencoder_2layer", true), AVG_POOL("avg_pool", false), AVG_POOL_BACKWARD("avg_pool_backward", false), BATCH_NORM2D("batch_norm2d", false, ReturnType.MULTI_RETURN), diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java new file mode 100644 index 0000000..81f93a5 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.functions.builtin; + + +import org.apache.sysds.runtime.matrix.data.MatrixValue; +import org.apache.sysds.test.AutomatedTestBase; +import org.apache.sysds.test.TestUtils; +import org.junit.Test; + +import java.util.HashMap; + + +public class BuiltinAutoencoder2LayerTest extends AutomatedTestBase +{ + private final static String TEST_NAME = "autoencoder_2layer"; + private final static String TEST_DIR = "functions/builtin/"; + private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinAutoencoder2LayerTest.class.getSimpleName() + "/"; + + private final static int rows = 1058; + private final static int cols = 784; + private final static double sparse = 0.1; + private final static double dense = 0.7; + private final static double tolerance = 1e-3; + + private static int batchSize = 256; + private static double step = 1e-5; + private static double decay = 0.95; + private static double momentum = 0.9; + private static boolean obj = false; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_CLASS_DIR, TEST_NAME); + } + + public void restoreDefaultParams(){ + batchSize = 256; + step = 1e-5; + decay = 0.95; + momentum = 0.9; + obj = false; + } + + @Test + public void testAutoencoderSparse80To5() { + runAutoencoderTest(80, 5, 2, sparse); + } + + @Test + public void testAutoencoderDense80To5() { + runAutoencoderTest(80, 5, 2, dense); + } + + @Test + public void testAutoencoderSparse10To4() { + runAutoencoderTest(10, 4, 2, sparse); + } + + @Test + public void testAutoencoderDense10To4() { + runAutoencoderTest(10, 4, 2, dense); + } + + @Test + public void testAutoencoderSparse200To20FullObj() { + obj = true; + runAutoencoderTest(200, 20, 2, sparse); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderDense120To10Batch512() { + batchSize = 512; + runAutoencoderTest(120, 10, 2, dense); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderSparse200To12DecMomentum() { + momentum = 0.8; + runAutoencoderTest(200, 12, 2, sparse); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderSparse200To12IncMomentum() { + momentum = 0.95; + runAutoencoderTest(200, 12, 2, sparse); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderDense20To3DecDecay() { + decay = 0.85; + runAutoencoderTest(20, 3, 2, dense); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderDense500To3FullObjBatch512IncStep() { + obj = true; + batchSize = 512; + step = 1e-4; + runAutoencoderTest(500, 3, 2, dense); + restoreDefaultParams(); + } + + @Test + public void testAutoencoderSparse354To7FullObjBatch512DecStepIncMomentumDecDecay() { + obj = true; + batchSize = 512; + step = 1e-6; + momentum = 0.95; + decay = 0.90; + runAutoencoderTest(354, 7, 2, sparse); + restoreDefaultParams(); + } + + private void runAutoencoderTest(int numHidden1, int numHidden2, int maxEpochs, double sparsity) { + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + + String HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + String fullObj = obj ? "TRUE" : "FALSE"; + programArgs = new String[]{ "-stats", "-nvargs", "X="+input("X"), + "H1="+numHidden1, "H2="+numHidden2, "EPOCH="+maxEpochs, "BATCH="+batchSize, + "STEP="+step, "DECAY="+decay, "MOMENTUM="+momentum, "OBJ="+fullObj, + "W1_rand="+input("W1_rand"),"W2_rand="+input("W2_rand"), + "W3_rand="+input("W3_rand"), "W4_rand="+input("W4_rand"), + "order_rand="+input("order_rand"), + "W1_out="+output("W1_out"), "b1_out="+output("b1_out"), + "W2_out="+output("W2_out"), "b2_out="+output("b2_out"), + "W3_out="+output("W3_out"), "b3_out="+output("b3_out"), + "W4_out="+output("W4_out"), "b4_out="+output("b4_out"), + "hidden_out="+output("hidden_out")}; + fullRScriptName = HOME + TEST_NAME + ".R"; + rCmd = getRCmd(inputDir(), String.valueOf(numHidden1), String.valueOf(numHidden2), String.valueOf(maxEpochs), + String.valueOf(batchSize), String.valueOf(momentum), String.valueOf(step), String.valueOf(decay), fullObj, expectedDir()); + + double[][] X = getRandomMatrix(rows, cols, 0, 1, sparsity, 27); + writeInputMatrixWithMTD("X", X, true); + + //generate rand matrices for W1, W2, W3, W4 here itself for passing onto both DML and R scripts + double[][] W1_rand = getRandomMatrix(numHidden1, cols, 0, 1, sparsity, 800); + writeInputMatrixWithMTD("W1_rand", W1_rand, true); + double[][] W2_rand = getRandomMatrix(numHidden2, numHidden1, 0, 1, sparsity, 900); + writeInputMatrixWithMTD("W2_rand", W2_rand, true); + double[][] W3_rand = getRandomMatrix(numHidden1, numHidden2, 0, 1, sparsity, 589); + writeInputMatrixWithMTD("W3_rand", W3_rand, true); + double[][] W4_rand = getRandomMatrix(cols, numHidden1, 0, 1, sparsity, 150); + writeInputMatrixWithMTD("W4_rand", W4_rand, true); + double[][] order_rand = getRandomMatrix(rows, 1, 0, 1, sparsity, 143); + writeInputMatrixWithMTD("order_rand", order_rand, true); //for the permut operation on input X + runTest(true, false, null, -1); + + runRScript(true); + + //compare matrices + HashMap<MatrixValue.CellIndex, Double> w1OutDML = readDMLMatrixFromOutputDir("W1_out"); + HashMap<MatrixValue.CellIndex, Double> b1OutDML = readDMLMatrixFromOutputDir("b1_out"); + HashMap<MatrixValue.CellIndex, Double> w2OutDML = readDMLMatrixFromOutputDir("W2_out"); + HashMap<MatrixValue.CellIndex, Double> b2OutDML = readDMLMatrixFromOutputDir("b2_out"); + HashMap<MatrixValue.CellIndex, Double> w3OutDML = readDMLMatrixFromOutputDir("W3_out"); + HashMap<MatrixValue.CellIndex, Double> b3OutDML = readDMLMatrixFromOutputDir("b3_out"); + HashMap<MatrixValue.CellIndex, Double> w4OutDML = readDMLMatrixFromOutputDir("W4_out"); + HashMap<MatrixValue.CellIndex, Double> b4OutDML = readDMLMatrixFromOutputDir("b4_out"); + + HashMap<MatrixValue.CellIndex, Double> w1OutR = readRMatrixFromExpectedDir("W1_out"); + HashMap<MatrixValue.CellIndex, Double> b1OutR = readRMatrixFromExpectedDir("b1_out"); + HashMap<MatrixValue.CellIndex, Double> w2OutR = readRMatrixFromExpectedDir("W2_out"); + HashMap<MatrixValue.CellIndex, Double> b2OutR = readRMatrixFromExpectedDir("b2_out"); + HashMap<MatrixValue.CellIndex, Double> w3OutR = readRMatrixFromExpectedDir("W3_out"); + HashMap<MatrixValue.CellIndex, Double> b3OutR = readRMatrixFromExpectedDir("b3_out"); + HashMap<MatrixValue.CellIndex, Double> w4OutR = readRMatrixFromExpectedDir("W4_out"); + HashMap<MatrixValue.CellIndex, Double> b4OutR = readRMatrixFromExpectedDir("b4_out"); + + TestUtils.compareMatrices(w1OutDML, w1OutR, tolerance, "W1-DML", "W1-R"); + TestUtils.compareMatrices(b1OutDML, b1OutR, tolerance, "b1-DML", "b1-R"); + TestUtils.compareMatrices(w2OutDML, w2OutR, tolerance, "W2-DML", "W2-R"); + TestUtils.compareMatrices(b2OutDML, b2OutR, tolerance, "b2-DML", "b2-R"); + TestUtils.compareMatrices(w3OutDML, w3OutR, tolerance, "W3-DML", "W3-R"); + TestUtils.compareMatrices(b3OutDML, b3OutR, tolerance, "b3-DML", "b3-R"); + TestUtils.compareMatrices(w4OutDML, w4OutR, tolerance, "W4-DML", "W4-R"); + TestUtils.compareMatrices(b4OutDML, b4OutR, tolerance, "b4-DML", "b4-R"); + } +} \ No newline at end of file diff --git a/src/test/scripts/functions/builtin/autoencoder_2layer.R b/src/test/scripts/functions/builtin/autoencoder_2layer.R new file mode 100644 index 0000000..4dbabd3 --- /dev/null +++ b/src/test/scripts/functions/builtin/autoencoder_2layer.R @@ -0,0 +1,264 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- +# +# Trains a 2-layer autoencoder with minibatch SGD, momentum and step-size decay. +# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder +# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization. +# The script standardizes the input before training (can be turned off). +# Also, it randomly reshuffles rows before training. +# Currently, tanh is set to be the activation function. +# By re-implementing 'func' DML-bodied function, one can change the activation. + +# INPUT PARAMETERS: +# --------------------------------------------------------------------------------------------- +# NAME TYPE DEFAULT MEANING +# --------------------------------------------------------------------------------------------- +# X String --- Filename where the input is stored +# H1 Int --- Number of neurons in the 1st hidden layer +# H2 Int --- Number of neurons in the 2nd hidden layer +# EPOCH Int --- Number of epochs to train for +# BATCH Int 256 Mini-batch size (training parameter) +# --------------------------------------------------------------------------------------------- +# +# OUTPUT PARAMETERS (all filenames): +# --------------------------------------------------------------------------------------------- +# NAME TYPE DEFAULT MEANING +# --------------------------------------------------------------------------------------------- +# OUT String --- File to store weights and biases between all layers +# HIDDEN String " " File to store the hidden (2nd) layer representation if needed +# --------------------------------------------------------------------------------------------- +# Assume that $AUTOENCODERR_HOME is set to the home of the R script +# Assume input and output directories are $AUTOENCODERR_HOME/in/ and $AUTOENCODERR_HOME/expected/ +# Rscript $AUTOENCODERR_HOME/Autoencoder.R $AUTOENCODERR_HOME/in/ 500 2 10 256 csv $AUTOENCODERR_HOME/expected/ + +args <- commandArgs(TRUE) +library("Matrix") + +#1. tanh function +func = function(X){ + Y = (exp(2*X) - 1)/(exp(2*X) + 1) + return(Y) +} + +func1 = function(X) { + Y = (exp(2*X) - 1)/(exp(2*X) + 1) + Y_prime = 1 - Y^2 + return(Y_prime) +} + +#2. feedForward + +obj <- function(E){ + val = 0.5 * sum(E^2) + return(val) +} + + +X = readMM(paste(args[1], "X.mtx", sep="")); +W1_rand = readMM(paste(args[1], "W1_rand.mtx", sep="")); +W2_rand = readMM(paste(args[1], "W2_rand.mtx", sep="")); +W3_rand = readMM(paste(args[1], "W3_rand.mtx", sep="")); +W4_rand = readMM(paste(args[1], "W4_rand.mtx", sep="")); +order_rand = readMM(paste(args[1], "order_rand.mtx", sep="")); + +num_hidden1 = as.integer(args[2]) #$H1 +num_hidden2 = as.integer(args[3]) #$H2 +max_epochs = as.integer(args[4]) #$EPOCH +batch_size = as.integer(args[5]) #$BATCH + +mu = as.double(args[6])# momentum +step = as.double(args[7]) +decay = as.double(args[8]) +#hfile = args[6] +fmt = "text" +full_obj = as.logical(args[9]) + +n = nrow(X) +m = ncol(X) + +#randomly reordering rows +#permut = table(seq(from=1,to=n,by=1), order(runif(n, min=0, max=1))) +permut = table(seq(from=1,to=n,by=1), order(order_rand)) +permut = as.data.frame.matrix(permut) +permut = data.matrix(permut) +X = (permut %*% X) +#z-transform, whitening operator is better +means = t(as.matrix(colSums(X)))/n +csx2 = t(as.matrix(colSums(X^2)))/n +stds = sqrt(csx2 - (means*means)*n/(n-1)) + 1e-17 +X = (X - matrix(1, nrow(X),1) %*% means)/(matrix(1,nrow(X),1) %*% stds) + +W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand +b1 = matrix(0, num_hidden1, 1) + +W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand +b2 = matrix(0, num_hidden2, 2) + +W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand +b3 = matrix(0, num_hidden1, 1) + +W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand +b4 = matrix(0, m, 1) + +upd_W1 = matrix(0, nrow(W1), ncol(W1)) +upd_b1 = matrix(0, nrow(b1), ncol(b1)) +upd_W2 = matrix(0, nrow(W2), ncol(W2)) +upd_b2 = matrix(0, nrow(b2), ncol(b2)) +upd_W3 = matrix(0, nrow(W3), ncol(W3)) +upd_b3 = matrix(0, nrow(b3), ncol(b3)) +upd_W4 = matrix(0, nrow(W4), ncol(W4)) +upd_b4 = matrix(0, nrow(b4), ncol(b4)) + +iter = 0 +num_iters_per_epoch = ceiling(n / batch_size) +max_iterations = max_epochs * num_iters_per_epoch +# debug +# print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations) +beg = 1 +while( iter < max_iterations ) { + end = beg + batch_size - 1 + if(end > n) end = n + X_batch = X[beg:end,] + + # Notation: + # 1 2 3 4 5 6 7 8 9 + # [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E] + # tmp_ff = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch) + # H1 = tmp_ff[1]; H1_prime = tmp_ff[2]; H2 = tmp_ff[3]; H2_prime = tmp_ff[4]; + # H3 = tmp_ff[5]; H3_prime = tmp_ff[6]; Yhat = tmp_ff[7]; Yhat_prime = tmp_ff[8]; + # E = tmp_ff[9] + # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X_batch + H1_in = t(W1 %*% t(X_batch) + b1 %*% matrix(1,ncol(b1),nrow(X_batch))) + H1 = func(H1_in) + H1_prime = func1(H1_in) + + H2_in = t(W2 %*% t(H1) + b2 %*% matrix(1,ncol(b2),nrow(H1))) + H2 = func(H2_in) + H2_prime = func1(H2_in) + + H3_in = t(W3 %*% t(H2) + b3 %*% matrix(1,ncol(b3),nrow(H2))) + H3 = func(H3_in) + H3_prime = func1(H3_in) + + Yhat_in = t(W4 %*% t(H3) + b4 %*% matrix(1,ncol(b4),nrow(H3))) + Yhat = func(Yhat_in) + Yhat_prime = func1(Yhat_in) + + E = Yhat - X_batch + + # Notation: + # 1 2 3 4 5 6 7 8 + # [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad,W4_grad, b4_grad] + # tmp_grad = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, W3, W4) + # W1_grad = tmp_grad[1]; b1_grad = tmp_grad[2]; W2_grad = tmp_grad[3]; b2_grad = tmp_grad[4]; + # W3_grad = tmp_grad[5]; b3_grad = tmp_grad[6]; W4_grad = tmp_grad[7]; b4_grad = tmp_grad[8]; + # grad function + + #backprop + delta4 = E * Yhat_prime + delta3 = H3_prime * (delta4 %*% W4) + delta2 = H2_prime * (delta3 %*% W3) + delta1 = H1_prime * (delta2 %*% W2) + + #compute gradients + b4_grad = (colSums(delta4)) + b3_grad = (colSums(delta3)) + b2_grad = (colSums(delta2)) + b1_grad = (colSums(delta1)) + + W4_grad = t(delta4) %*% H3 + W3_grad = t(delta3) %*% H2 + W2_grad = t(delta2) %*% H1 + W1_grad = t(delta1) %*% X_batch + + ob = obj(E) + epochs = iter / num_iters_per_epoch + # debug + # print(table(epochs, ob), zero.print = "0") + + #update + local_step = step / nrow(X_batch) + upd_W1 = mu * upd_W1 - local_step * W1_grad + upd_b1 = mu * upd_b1 - local_step * b1 + upd_W2 = mu * upd_W2 - local_step * W2_grad + upd_b2 = mu * upd_b2 - local_step * b2 + upd_W3 = mu * upd_W3 - local_step * W3_grad + upd_b3 = mu * upd_b3 - local_step * b3 + upd_W4 = mu * upd_W4 - local_step * W4_grad + upd_b4 = mu * upd_b4 - local_step * b4 + W1 = W1 + upd_W1 + b1 = b1 + upd_b1 + W2 = W2 + upd_W2 + b2 = b2 + upd_b2 + W3 = W3 + upd_W3 + b3 = b3 + upd_b3 + W4 = W4 + upd_W4 + b4 = b4 + upd_b4 + + iter = iter + 1 + if(end == n) beg = 1 + else beg = end + 1 + + if(iter %% num_iters_per_epoch == 0) + step = step * decay + + if(full_obj & iter %% num_iters_per_epoch == 0 ) { + # Notation: + # tmp_ff = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X) + # full_H1 = tmp_ff[1]; full_H1_prime = tmp_ff[2]; full_H2 = tmp_ff[3]; full_H2_prime = tmp_ff[4]; + # full_H3 = tmp_ff[5]; full_H3_prime = tmp_ff[6]; full_Yhat = tmp_ff[7]; full_Yhat_prime = tmp_ff[8]; + # full_E = tmp_ff[9]; + # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X + H1_in = t(W1 %*% t(X) + b1 %*% matrix(1,ncol(b1),nrow(X))) + full_H1 = func(H1_in) + full_H1_prime = func1(H1_in) + + H2_in = t(W2 %*% t(H1) + b2 %*% matrix(1,ncol(b2),nrow(H1))) + full_H2 = func(H2_in) + full_H2_prime = func1(H2_in) + + H3_in = t(W3 %*% t(H2) + b3 %*% matrix(1,ncol(b3),nrow(H2))) + full_H3 = func(H3_in) + full_H3_prime = func1(H3_in) + + Yhat_in = t(W4 %*% t(H3) + b4 %*% matrix(1,ncol(b4),nrow(H3))) + full_Yhat = func(Yhat_in) + full_Yhat_prime = func1(Yhat_in) + full_E = full_Yhat - X_batch + + full_o = obj(full_E) + epochs = iter %/% num_iters_per_epoch + # debug + # print(table(epochs, full_o, deparse.level=2), zero.print=".") + # print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o) + } +} + +#debug +#print.table(W1, digits=3) +writeMM(as(W1,"CsparseMatrix"), paste(args[10], "W1_out", sep="")); +writeMM(as(b1,"CsparseMatrix"), paste(args[10], "b1_out", sep="")); +writeMM(as(W2,"CsparseMatrix"), paste(args[10], "W2_out", sep="")); +writeMM(as(b2,"CsparseMatrix"), paste(args[10], "b2_out", sep="")); +writeMM(as(W3,"CsparseMatrix"), paste(args[10], "W3_out", sep="")); +writeMM(as(b3,"CsparseMatrix"), paste(args[10], "b3_out", sep="")); +writeMM(as(W4,"CsparseMatrix"), paste(args[10], "W4_out", sep="")); +writeMM(as(b4,"CsparseMatrix"), paste(args[10], "b4_out", sep="")); diff --git a/src/test/scripts/functions/builtin/autoencoder_2layer.dml b/src/test/scripts/functions/builtin/autoencoder_2layer.dml new file mode 100644 index 0000000..05574cb --- /dev/null +++ b/src/test/scripts/functions/builtin/autoencoder_2layer.dml @@ -0,0 +1,35 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +[W1, b1, W2, b2, W3, b3, W4, b4, hidden] = autoencoder_2layer(X = read($X), W1_rand = read($W1_rand), W2_rand = read($W2_rand), + W3_rand = read($W3_rand), W4_rand = read($W4_rand), order_rand = read($order_rand), + num_hidden1 = $H1, num_hidden2 = $H2, max_epochs = $EPOCH, full_obj = $OBJ, + batch_size = $BATCH, step = $STEP, decay = $DECAY, mu = $MOMENTUM) + +write(W1, $W1_out); +write(b1, $b1_out); +write(W2, $W2_out); +write(b2, $b2_out); +write(W3, $W3_out); +write(b3, $b3_out); +write(W4, $W4_out); +write(b4, $b4_out); +write(hidden, $hidden_out) \ No newline at end of file