[systemml] branch master updated: [SYSTEMML-2121] AutoEncoder test for codegenalg suite

mboehm7 Mon, 27 Apr 2020 15:17:55 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git



The following commit(s) were added to refs/heads/master by this push:
     new bdf78e4  [SYSTEMML-2121] AutoEncoder test for codegenalg suite
bdf78e4 is described below

commit bdf78e462506ef8ef7fc9e6b23a6520e4155eca0
Author: Janardhan <[email protected]>
AuthorDate: Tue Apr 28 00:15:58 2020 +0200

    [SYSTEMML-2121] AutoEncoder test for codegenalg suite
    
    This patch adds a test case for AutoEncoder with codegen
    enabled against a corresponding R script.
    
    Closes #890.
---
 .../codegenalg/partone/AlgorithmAutoEncoder.java   |  57 ++++-
 .../functions/codegenalg/Algorithm_AutoEncoder.R   | 239 ++++++++++++++++++++
 .../functions/codegenalg/Algorithm_AutoEncoder.dml | 251 +++++++++++++++++++++
 3 files changed, 542 insertions(+), 5 deletions(-)

diff --git 
a/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
 
b/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
index 90d7ff8..fca850c 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
@@ -20,7 +20,9 @@
 package org.apache.sysds.test.functions.codegenalg.partone;
 
 import java.io.File;
+import java.util.HashMap;
 
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
 import org.junit.Assert;
 import org.junit.Test;
 import org.apache.sysds.api.DMLScript;
@@ -37,11 +39,12 @@ public class AlgorithmAutoEncoder extends AutomatedTestBase
        private final static String TEST_DIR = "functions/codegenalg/";
        private final static String TEST_CLASS_DIR = TEST_DIR + 
AlgorithmAutoEncoder.class.getSimpleName() + "/";
        
-       private final static int rows = 2468;
+       private final static int rows = 1068;
        private final static int cols = 784;
        
        private final static double sparsity1 = 0.7; //dense
        private final static double sparsity2 = 0.1; //sparse
+       private final static double eps       = 1e-5;
        
        private final static int H1 = 500;
        private final static int H2 = 2;
@@ -179,22 +182,66 @@ public class AlgorithmAutoEncoder extends 
AutomatedTestBase
                        TestConfiguration config = 
getTestConfiguration(TEST_NAME);
                        loadTestConfiguration(config);
                        
-                       fullDMLScriptName = 
"scripts/staging/autoencoder-2layer.dml";
+                       fullDMLScriptName = SCRIPT_DIR + TEST_DIR + 
"/Algorithm_AutoEncoder.dml";
+                       //"scripts/staging/autoencoder-2layer.dml";
                        programArgs = new String[]{ "-stats", "-nvargs", 
"X="+input("X"),
-                               "H1="+H1, "H2="+H2, "EPOCH="+epochs, 
"BATCH="+batchsize, 
+                               "H1="+H1, "H2="+H2, "EPOCH="+epochs, 
"BATCH="+batchsize,
+                               
"W1_rand="+input("W1_rand"),"W2_rand="+input("W2_rand"),
+                               "W3_rand="+input("W3_rand"), 
"W4_rand="+input("W4_rand"),
+                               "order_rand="+input("order_rand"),
                                "W1_out="+output("W1"), "b1_out="+output("b1"),
                                "W2_out="+output("W2"), "b2_out="+output("b2"),
                                "W3_out="+output("W3"), "b3_out="+output("b3"),
                                "W4_out="+output("W4"), "b4_out="+output("b4")};
+
+                       rCmd = getRCmd(inputDir(), String.valueOf(H1), 
String.valueOf(H2),
+                                       String.valueOf(epochs), 
String.valueOf(batchsize), expectedDir());
                        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = 
rewrites;
                        
                        //generate actual datasets
                        double[][] X = getRandomMatrix(rows, cols, 0, 1, 
sparse?sparsity2:sparsity1, 714);
                        writeInputMatrixWithMTD("X", X, true);
-                       
+
+                       //generate rand matrices for W1, W2, W3, W4 here itself 
for passing onto both DML and R scripts
+                       double[][] W1_rand = getRandomMatrix(H1, cols, 0, 1, 
sparse?sparsity2:sparsity1, 800);
+                       writeInputMatrixWithMTD("W1_rand", W1_rand, true);
+                       double[][] W2_rand = getRandomMatrix(H2, H1, 0, 1, 
sparse?sparsity2:sparsity1, 900);
+                       writeInputMatrixWithMTD("W2_rand", W2_rand, true);
+                       double[][] W3_rand = getRandomMatrix(H1, H2, 0, 1, 
sparse?sparsity2:sparsity1, 589);
+                       writeInputMatrixWithMTD("W3_rand", W3_rand, true);
+                       double[][] W4_rand = getRandomMatrix(cols, H1, 0, 1, 
sparse?sparsity2:sparsity1, 150);
+                       writeInputMatrixWithMTD("W4_rand", W4_rand, true);
+                       double[][] order_rand = getRandomMatrix(rows, 1, 0, 1, 
sparse?sparsity2:sparsity1, 143);
+                       writeInputMatrixWithMTD("order_rand", order_rand, 
true); //for the permut operation on input X
+
                        //run script
                        runTest(true, false, null, -1); 
-                       //TODO R script
+                       runRScript(true);
+
+                       HashMap<MatrixValue.CellIndex, Double> dmlW1 = 
readDMLMatrixFromHDFS("W1");
+                       HashMap<MatrixValue.CellIndex, Double> dmlW2 = 
readDMLMatrixFromHDFS("W2");
+                       HashMap<MatrixValue.CellIndex, Double> dmlW3 = 
readDMLMatrixFromHDFS("W3");
+                       HashMap<MatrixValue.CellIndex, Double> dmlW4 = 
readDMLMatrixFromHDFS("W4");
+                       HashMap<MatrixValue.CellIndex, Double> dmlb1 = 
readDMLMatrixFromHDFS("b1");
+                       HashMap<MatrixValue.CellIndex, Double> dmlb2 = 
readDMLMatrixFromHDFS("b2");
+                       HashMap<MatrixValue.CellIndex, Double> dmlb3 = 
readDMLMatrixFromHDFS("b3");
+                       HashMap<MatrixValue.CellIndex, Double> dmlb4 = 
readDMLMatrixFromHDFS("b4");
+                       HashMap<MatrixValue.CellIndex, Double> rW1  = 
readRMatrixFromFS("W1");
+                       HashMap<MatrixValue.CellIndex, Double> rW2  = 
readRMatrixFromFS("W2");
+                       HashMap<MatrixValue.CellIndex, Double> rW3  = 
readRMatrixFromFS("W3");
+                       HashMap<MatrixValue.CellIndex, Double> rW4  = 
readRMatrixFromFS("W4");
+                       HashMap<MatrixValue.CellIndex, Double> rb1  = 
readRMatrixFromFS("b1");
+                       HashMap<MatrixValue.CellIndex, Double> rb2  = 
readRMatrixFromFS("b2");
+                       HashMap<MatrixValue.CellIndex, Double> rb3  = 
readRMatrixFromFS("b3");
+                       HashMap<MatrixValue.CellIndex, Double> rb4  = 
readRMatrixFromFS("b4");
+                       TestUtils.compareMatrices(dmlW1, rW1, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlW2, rW2, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlW3, rW3, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlW4, rW4, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlb1, rb1, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlb2, rb2, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlb3, rb3, eps, "Stat-DML", 
"Stat-R");
+                       TestUtils.compareMatrices(dmlb4, rb4, eps, "Stat-DML", 
"Stat-R");
                        
                        
Assert.assertTrue(heavyHittersContainsSubString("spoof") 
                                || heavyHittersContainsSubString("sp_spoof"));
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R 
b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R
new file mode 100644
index 0000000..1835540
--- /dev/null
+++ b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R
@@ -0,0 +1,239 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library("Matrix")
+
+#1. tanh function
+func = function(X){
+  Y = tanh(X)
+  return(Y)
+}
+
+func1 = function(X) {
+  Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+  Y_prime = 1 - Y^2
+  return(Y_prime)
+}
+
+#2. feedForward
+
+obj <- function(E){
+  val = 0.5 * sum(E^2)
+  return(val)
+}
+
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+W1_rand = readMM(paste(args[1], "W1_rand.mtx", sep=""));
+W2_rand = readMM(paste(args[1], "W2_rand.mtx", sep=""));
+W3_rand = readMM(paste(args[1], "W3_rand.mtx", sep=""));
+W4_rand = readMM(paste(args[1], "W4_rand.mtx", sep=""));
+order_rand = readMM(paste(args[1], "order_rand.mtx", sep=""));
+
+num_hidden1 = as.integer(args[2])    #$H1
+num_hidden2 = as.integer(args[3])    #$H2
+max_epochs = as.integer(args[4])     #$EPOCH
+batch_size = as.integer(args[5])     #$BATCH
+
+mu = 0.9 # momentum
+step = 1e-5
+decay = 0.95
+hfile = " "
+fmt = "text"
+full_obj = FALSE
+
+n = nrow(X)
+m = ncol(X)
+
+#randomly reordering rows
+#permut = table(seq(from=1,to=n,by=1), order(runif(n, min=0, max=1)))
+permut = table(seq(from=1,to=n,by=1), order(order_rand))
+permut = as.data.frame.matrix(permut)
+permut = data.matrix(permut)
+X = (permut %*% X)
+#z-transform, whitening operator is better
+means = t(as.matrix(colSums(X)))/n
+csx2 = t(as.matrix(colSums(X^2)))/n
+stds = sqrt(csx2 - (means*means)*n/(n-1)) + 1e-17
+X = (X - matrix(1, nrow(X),1) %*% means)/(matrix(1,nrow(X),1) %*% stds)
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand
+b1 = matrix(0, num_hidden1, 1)
+
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand
+b2 = matrix(0, num_hidden2, 2)
+
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand
+b3 = matrix(0, num_hidden1, 1)
+
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand
+b4 = matrix(0, m, 1)
+
+upd_W1 = matrix(0, nrow(W1), ncol(W1))
+upd_b1 = matrix(0, nrow(b1), ncol(b1))
+upd_W2 = matrix(0, nrow(W2), ncol(W2))
+upd_b2 = matrix(0, nrow(b2), ncol(b2))
+upd_W3 = matrix(0, nrow(W3), ncol(W3))
+upd_b3 = matrix(0, nrow(b3), ncol(b3))
+upd_W4 = matrix(0, nrow(W4), ncol(W4))
+upd_b4 = matrix(0, nrow(b4), ncol(b4))
+
+if( full_obj ){
+    # nothing to do here
+}
+
+iter = 0
+num_iters_per_epoch = ceiling(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+# debug
+# print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + 
max_iterations)
+beg = 1
+while( iter < max_iterations ) {
+  end = beg + batch_size - 1
+  if(end > n) end = n
+  X_batch = X[beg:end,]
+
+  # Notation:
+  #  1    2         3   4         5   6         7     8          9
+  # [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E]
+  # tmp_ff = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+  # H1 = tmp_ff[1]; H1_prime = tmp_ff[2]; H2 = tmp_ff[3]; H2_prime = tmp_ff[4];
+  # H3 = tmp_ff[5]; H3_prime = tmp_ff[6]; Yhat = tmp_ff[7]; Yhat_prime = 
tmp_ff[8];
+  # E = tmp_ff[9]
+  # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X_batch
+  H1_in = t(W1 %*% t(X_batch) + b1 %*% matrix(1,ncol(b1),nrow(X_batch)))
+  H1 = func(H1_in)
+  H1_prime = func1(H1_in)
+
+  H2_in = t(W2 %*% t(H1) + b2%*% matrix(1,ncol(b2),nrow(H1)))
+  H2 = func(H2_in)
+  H2_prime = func1(H2_in)
+
+  H3_in = t(W3 %*% t(H2) + b3%*% matrix(1,ncol(b3),nrow(H2)))
+  H3 = func(H3_in)
+  H3_prime = func1(H3_in)
+
+  Yhat_in = t(W4 %*% t(H3) + b4%*% matrix(1,ncol(b4),nrow(H3)))
+  Yhat = func(Yhat_in)
+  Yhat_prime = func1(Yhat_in)
+
+  E = Yhat - X_batch
+
+  # Notation:
+  #  1        2           3      4        5         6        7      8
+  # [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad,W4_grad, b4_grad]
+  # tmp_grad = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, 
Yhat_prime, E, W1, W2, W3, W4)
+  # W1_grad = tmp_grad[1]; b1_grad = tmp_grad[2]; W2_grad = tmp_grad[3]; 
b2_grad = tmp_grad[4];
+  # W3_grad = tmp_grad[5]; b3_grad = tmp_grad[6]; W4_grad = tmp_grad[7]; 
b4_grad = tmp_grad[8];
+  # grad function
+
+  #backprop
+  delta4 = E * Yhat_prime
+  delta3 = H3_prime * (delta4 %*% W4)
+  delta2 = H2_prime * (delta3 %*% W3)
+  delta1 = H1_prime * (delta2 %*% W2)
+
+  #compute gradients
+  b4_grad = (colSums(delta4))
+  b3_grad = (colSums(delta3))
+  b2_grad = (colSums(delta2))
+  b1_grad = (colSums(delta1))
+
+  W4_grad = t(delta4) %*% H3
+  W3_grad = t(delta3) %*% H2
+  W2_grad = t(delta2) %*% H1
+  W1_grad = t(delta1) %*% X_batch
+
+  ob = obj(E)
+  epochs = iter / num_iters_per_epoch
+  # debug
+  # print(table(epochs, ob), zero.print = "0")
+
+  #update
+  local_step = step / nrow(X_batch)
+  upd_W1 = mu * upd_W1 - local_step * W1_grad
+  upd_b1 = mu * upd_b1 - local_step * b1
+  upd_W2 = mu * upd_W2 - local_step * W2_grad
+  upd_b2 = mu * upd_b2 - local_step * b2
+  upd_W3 = mu * upd_W3 - local_step * W3_grad
+  upd_b3 = mu * upd_b3 - local_step * b3
+  upd_W4 = mu * upd_W4 - local_step * W4_grad
+  upd_b4 = mu * upd_b4 - local_step * b4
+  W1 = W1 + upd_W1
+  b1 = b1 + upd_b1
+  W2 = W2 + upd_W2
+  b2 = b2 + upd_b2
+  W3 = W3 + upd_W3
+  b3 = b3 + upd_b3
+  W4 = W4 + upd_W4
+  b4 = b4 + upd_b4
+
+  iter = iter + 1
+  if(end == n) beg = 1
+  else beg = end + 1
+
+  if(iter %% num_iters_per_epoch == 0) 
+         step = step * decay
+
+  if(full_obj & iter %% num_iters_per_epoch == 0 ) {
+    # Notation:
+    # tmp_ff = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+    # full_H1 = tmp_ff[1]; full_H1_prime = tmp_ff[2]; full_H2 = tmp_ff[3]; 
full_H2_prime = tmp_ff[4];
+    # full_H3 = tmp_ff[5]; full_H3_prime = tmp_ff[6]; full_Yhat = tmp_ff[7]; 
full_Yhat_prime = tmp_ff[8];
+    # full_E = tmp_ff[9];
+    # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X
+    H1_in = t(W1 %*% t(X) + b1 %*% matrix(1,ncol(b1),nrow(X)))
+
+    full_H1 = func(H1_in)
+    full_H1_prime = func1(H1_in)
+
+    H2_in = t(W2 %*% t(H1) + b2%*% matrix(1,ncol(b2),nrow(H1)))
+    full_H2 = func(H2_in)
+    full_H2_prime = func1(H2_in)
+
+    H3_in = t(W3 %*% t(H2) + b3%*% matrix(1,ncol(b3),nrow(H2)))
+    full_H3 = func(H3_in)
+    full_H3_prime = func1(H3_in)
+
+    Yhat_in = t(W4 %*% t(H3) + b4%*% matrix(1,ncol(b4),nrow(H3)))
+    full_Yhat = func(Yhat_in)
+    full_Yhat_prime = func1(Yhat_in)
+    full_E = full_Yhat - X
+
+    full_o = obj(full_E)
+    epochs = iter %/% num_iters_per_epoch
+    # debug
+    # print(table(epochs, full_o, deparse.level=2), zero.print=".")
+    # print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + 
full_o)
+  }
+}
+
+#debug
+#print.table(W1, digits=3)
+writeMM(as(W1,"CsparseMatrix"), paste(args[6], "W1", sep=""));
+writeMM(as(b1,"CsparseMatrix"), paste(args[6], "b1", sep=""));
+writeMM(as(W2,"CsparseMatrix"), paste(args[6], "W2", sep=""));
+writeMM(as(b2,"CsparseMatrix"), paste(args[6], "b2", sep=""));
+writeMM(as(W3,"CsparseMatrix"), paste(args[6], "W3", sep=""));
+writeMM(as(b3,"CsparseMatrix"), paste(args[6], "b3", sep=""));
+writeMM(as(W4,"CsparseMatrix"), paste(args[6], "W4", sep=""));
+writeMM(as(b4,"CsparseMatrix"), paste(args[6], "b4", sep=""));
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml 
b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
new file mode 100644
index 0000000..c0624a1
--- /dev/null
+++ b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
@@ -0,0 +1,251 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Trains a 2-layer autoencoder with minibatch SGD, momentum and step-size 
decay.
+# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+# The script standardizes the input before training (can be turned off).
+# Also, it randomly reshuffles rows before training.
+# Currently, tanh is set to be the activation function.
+# By re-implementing 'func' DML-bodied function, one can change the activation.
+
+# INPUT PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME      TYPE     DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# X         String   ---      Filename where the input is stored
+# H1        Int      ---      Number of neurons in the 1st hidden layer
+# H2        Int      ---      Number of neurons in the 2nd hidden layer
+# EPOCH     Int      ---      Number of epochs to train for
+# fmt       String   'text'   Output format ("text", "csv", "binary" etc.)
+# OBJ       Boolean  FALSE    If TRUE, Computes objective function value 
(squared-loss)
+#                             at the end of each epoch. Note that, computing 
the full
+#                             objective can take a lot of time.
+# BATCH     Int      256      Mini-batch size (training parameter)
+# STEP      Double   1e-5     Initial step size (training parameter)
+# DECAY     Double   0.95     Decays step size after each epoch (training 
parameter)
+# MOMENTUM  Double   0.9      Momentum parameter (training parameter)
+# 
---------------------------------------------------------------------------------------------
+#
+# OUTPUT PARAMETERS (all filenames):
+# 
---------------------------------------------------------------------------------------------
+# NAME     TYPE   DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# W1_out   String   ---   File to store weights between input layer and 1st 
hidden layer
+# b1_out   String   ---   File to store bias between input layer and 1st 
hidden layer
+# W2_out   String   ---   File to store weights between 1st hidden layer and 
2nd hidden layer
+# b2_out   String   ---   File to store bias between 1st hidden layer and 2nd 
hidden layer
+# W3_out   String   ---   File to store weights between 2nd hidden layer and 
3rd hidden layer
+# b3_out   String   ---   File to store bias between 2nd hidden layer and 3rd 
hidden layer
+# W4_out   String   ---   File to store weights between 3rd hidden layer and 
output layer
+# b4_out   String   ---   File to store bias between 3rd hidden layer and 
output layer
+# HIDDEN   String   " "   File to store the hidden (2nd) layer representation 
if needed
+# 
---------------------------------------------------------------------------------------------
+#
+# INVOCATION:
+# -f autoencoder_2layer.dml --nvargs X=<input file> H1=500 H2=2 EPOCH=1 
fmt="csv"
+#      W1_out=<weights from input to 1st hidden layer> b1_out=<bias from input 
to 1st hidden layer>
+#      W2_out=<weights from 1st hidden layer to 2nd hidden layer> b2_out=<bias 
from 1st hidden layer to 2nd hidden layer>
+#      W3_out=<weights from 2nd hidden layer to 3rd hidden layer> b3_out=<bias 
from 2nd hidden layer to 3rd hidden layer>
+#      W4_out=<weights from 3rd hidden layer to output> b4_out=<bias from 3rd 
hidden layer to output>
+#
+
+# NOTE: This is a copy of the `scripts/staging/autoencoder-2layer.dml` with 
rand()
+# replaced with random matrices generated in the corresponding test class.
+
+#implements tanh
+#to use another activation fn, implement a DML-bodied function with
+#function name 'func' and comment out this one
+func = function(Matrix[Double] X) return(Matrix[Double] Y, Matrix[Double] 
Y_prime){
+  Y = tanh(X)
+  Y_prime = 1 - Y^2
+}
+
+feedForward = function(Matrix[Double] X, Matrix[Double] W1, Matrix[Double] b1,
+    Matrix[Double] W2, Matrix[Double] b2, Matrix[Double] W3, Matrix[Double] b3,
+    Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] Y)
+  return(Matrix[Double] H1, Matrix[Double] H1_prime, Matrix[Double] H2, 
+    Matrix[Double] H2_prime, Matrix[Double] H3, Matrix[Double] H3_prime,
+    Matrix[Double] Yhat, Matrix[Double] Yhat_prime, Matrix[Double] E)
+{
+  H1_in = t(W1 %*% t(X) + b1)
+  [H1, H1_prime] = func(H1_in)
+
+  H2_in = t(W2 %*% t(H1) + b2)
+  [H2, H2_prime] = func(H2_in)
+
+  H3_in = t(W3 %*% t(H2) + b3)
+  [H3, H3_prime] = func(H3_in)
+
+  Yhat_in = t(W4 %*% t(H3) + b4)
+  [Yhat, Yhat_prime] = func(Yhat_in)
+  E = Yhat - Y
+}
+
+grad = function(Matrix[Double] X, Matrix[Double] H1, Matrix[Double] H1_prime,
+    Matrix[Double] H2, Matrix[Double] H2_prime, Matrix[Double] H3,
+    Matrix[Double] H3_prime, Matrix[Double] Yhat_prime, Matrix[Double] E,
+    Matrix[Double] W1, Matrix[Double] W2, Matrix[Double] W3, Matrix[Double] W4)
+  return(Matrix[Double] W1_grad, Matrix[Double] b1_grad, Matrix[Double] 
W2_grad,
+    Matrix[Double] b2_grad, Matrix[Double] W3_grad, Matrix[Double] b3_grad,
+    Matrix[Double] W4_grad, Matrix[Double] b4_grad)
+{
+  #backprop
+  delta4 = E * Yhat_prime
+  delta3 = H3_prime * (delta4 %*% W4)
+  delta2 = H2_prime * (delta3 %*% W3)
+  delta1 = H1_prime * (delta2 %*% W2)
+
+  #compute gradients
+  b4_grad = t(colSums(delta4))
+  b3_grad = t(colSums(delta3))
+  b2_grad = t(colSums(delta2))
+  b1_grad = t(colSums(delta1))
+
+  W4_grad = t(delta4) %*% H3
+  W3_grad = t(delta3) %*% H2
+  W2_grad = t(delta2) %*% H1
+  W1_grad = t(delta1) %*% X
+}
+
+obj = function(Matrix[Double] E) return(Double val){
+  val = 0.5 * sum(E^2)
+}
+
+batch_size = ifdef($BATCH, 256)
+mu = ifdef($MOMENTUM, 0.9)
+step = ifdef($STEP, 1e-5)
+decay = ifdef($DECAY, 0.95)
+hfile = ifdef($HIDDEN, " ")
+fmt = ifdef($fmt, "text")
+full_obj = ifdef($OBJ, FALSE)
+
+X = read($X)
+num_hidden1 = $H1
+num_hidden2 = $H2
+max_epochs = $EPOCH
+W1_rand = read($W1_rand)
+W2_rand = read($W2_rand)
+W3_rand = read($W3_rand)
+W4_rand = read($W4_rand)
+order_rand = read($order_rand)
+
+n = nrow(X)
+m = ncol(X)
+
+#randomly reordering rows
+#permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1, 
pdf="uniform"), by=1, index.return=TRUE), n, n)
+permut = table(seq(1,n,1), order(target=order_rand, by=1, index.return=TRUE), 
n, n)
+X = permut %*% X
+
+#z-transform, whitening operator is better
+means = colSums(X)/n
+stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17
+X = (X - means)/stds
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand #Rand(rows=num_hidden1, cols=m, 
min=-1, max=1, pdf="uniform")
+b1 = matrix(0, rows=num_hidden1, cols=1)
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand #Rand(rows=num_hidden2, 
cols=num_hidden1, min=-1, max=1, pdf="uniform")
+b2 = matrix(0, rows=num_hidden2, cols=1)
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand #Rand(rows=num_hidden1, 
cols=num_hidden2, min=-1, max=1, pdf="uniform")
+b3 = matrix(0, rows=num_hidden1, cols=1)
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand #Rand(rows=m, cols=num_hidden1, 
min=-1, max=1, pdf="uniform")
+b4 = matrix(0, rows=m, cols=1)
+
+upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1))
+upd_b1 = matrix(0, rows=nrow(b1), cols=ncol(b1))
+upd_W2 = matrix(0, rows=nrow(W2), cols=ncol(W2))
+upd_b2 = matrix(0, rows=nrow(b2), cols=ncol(b2))
+upd_W3 = matrix(0, rows=nrow(W3), cols=ncol(W3))
+upd_b3 = matrix(0, rows=nrow(b3), cols=ncol(b3))
+upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4))
+upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4))
+
+if( full_obj ){
+  [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, 
+    full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, 
b3, W4, b4, X)
+  full_o = obj(full_E)
+  print("EPOCHS=" + 0 + " OBJ (FULL DATA): " + full_o)
+}
+
+iter = 0
+num_iters_per_epoch = ceil(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+#print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + 
max_iterations)
+beg = 1
+while( iter < max_iterations ) {
+  end = beg + batch_size - 1
+  if(end > n) end = n
+  X_batch = X[beg:end,]
+
+  [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E] = 
feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+  [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad] = 
grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, 
W3, W4)
+
+  o = obj(E)
+  print("epochs=%5.4f BATCH beg=%d end=%d obj=%f", (iter / 
num_iters_per_epoch), beg, end, o)
+
+  #update
+  local_step = step / nrow(X_batch)
+  upd_W1 = mu * upd_W1 - local_step * W1_grad
+  upd_b1 = mu * upd_b1 - local_step * b1
+  upd_W2 = mu * upd_W2 - local_step * W2_grad
+  upd_b2 = mu * upd_b2 - local_step * b2
+  upd_W3 = mu * upd_W3 - local_step * W3_grad
+  upd_b3 = mu * upd_b3 - local_step * b3
+  upd_W4 = mu * upd_W4 - local_step * W4_grad
+  upd_b4 = mu * upd_b4 - local_step * b4
+  W1 = W1 + upd_W1
+  b1 = b1 + upd_b1
+  W2 = W2 + upd_W2
+  b2 = b2 + upd_b2
+  W3 = W3 + upd_W3
+  b3 = b3 + upd_b3
+  W4 = W4 + upd_W4
+  b4 = b4 + upd_b4
+
+  iter = iter + 1
+  if(end == n) beg = 1
+  else beg = end + 1
+
+  if( iter %% num_iters_per_epoch == 0 ) step = step * decay
+
+  if( full_obj & iter %% num_iters_per_epoch == 0 ){
+    [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, 
full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, 
W4, b4, X)
+    full_o = obj(full_E)
+    epochs = iter %/% num_iters_per_epoch
+    print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o)
+  }
+}
+
+write(W1, $W1_out, format=fmt)
+write(b1, $b1_out, format=fmt)
+write(W2, $W2_out, format=fmt)
+write(b2, $b2_out, format=fmt)
+write(W3, $W3_out, format=fmt)
+write(b3, $b3_out, format=fmt)
+write(W4, $W4_out, format=fmt)
+write(b4, $b4_out, format=fmt)
+
+if( hfile != " " ){
+  [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, 
full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, 
W4, b4, X)
+  reordered_H = t(permut) %*% full_H2
+  write(reordered_H, hfile, format=fmt)
+}

[systemml] branch master updated: [SYSTEMML-2121] AutoEncoder test for codegenalg suite

Reply via email to