This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new bdf78e4 [SYSTEMML-2121] AutoEncoder test for codegenalg suite
bdf78e4 is described below
commit bdf78e462506ef8ef7fc9e6b23a6520e4155eca0
Author: Janardhan <[email protected]>
AuthorDate: Tue Apr 28 00:15:58 2020 +0200
[SYSTEMML-2121] AutoEncoder test for codegenalg suite
This patch adds a test case for AutoEncoder with codegen
enabled against a corresponding R script.
Closes #890.
---
.../codegenalg/partone/AlgorithmAutoEncoder.java | 57 ++++-
.../functions/codegenalg/Algorithm_AutoEncoder.R | 239 ++++++++++++++++++++
.../functions/codegenalg/Algorithm_AutoEncoder.dml | 251 +++++++++++++++++++++
3 files changed, 542 insertions(+), 5 deletions(-)
diff --git
a/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
b/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
index 90d7ff8..fca850c 100644
---
a/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
+++
b/src/test/java/org/apache/sysds/test/functions/codegenalg/partone/AlgorithmAutoEncoder.java
@@ -20,7 +20,9 @@
package org.apache.sysds.test.functions.codegenalg.partone;
import java.io.File;
+import java.util.HashMap;
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
import org.junit.Assert;
import org.junit.Test;
import org.apache.sysds.api.DMLScript;
@@ -37,11 +39,12 @@ public class AlgorithmAutoEncoder extends AutomatedTestBase
private final static String TEST_DIR = "functions/codegenalg/";
private final static String TEST_CLASS_DIR = TEST_DIR +
AlgorithmAutoEncoder.class.getSimpleName() + "/";
- private final static int rows = 2468;
+ private final static int rows = 1068;
private final static int cols = 784;
private final static double sparsity1 = 0.7; //dense
private final static double sparsity2 = 0.1; //sparse
+ private final static double eps = 1e-5;
private final static int H1 = 500;
private final static int H2 = 2;
@@ -179,22 +182,66 @@ public class AlgorithmAutoEncoder extends
AutomatedTestBase
TestConfiguration config =
getTestConfiguration(TEST_NAME);
loadTestConfiguration(config);
- fullDMLScriptName =
"scripts/staging/autoencoder-2layer.dml";
+ fullDMLScriptName = SCRIPT_DIR + TEST_DIR +
"/Algorithm_AutoEncoder.dml";
+ //"scripts/staging/autoencoder-2layer.dml";
programArgs = new String[]{ "-stats", "-nvargs",
"X="+input("X"),
- "H1="+H1, "H2="+H2, "EPOCH="+epochs,
"BATCH="+batchsize,
+ "H1="+H1, "H2="+H2, "EPOCH="+epochs,
"BATCH="+batchsize,
+
"W1_rand="+input("W1_rand"),"W2_rand="+input("W2_rand"),
+ "W3_rand="+input("W3_rand"),
"W4_rand="+input("W4_rand"),
+ "order_rand="+input("order_rand"),
"W1_out="+output("W1"), "b1_out="+output("b1"),
"W2_out="+output("W2"), "b2_out="+output("b2"),
"W3_out="+output("W3"), "b3_out="+output("b3"),
"W4_out="+output("W4"), "b4_out="+output("b4")};
+
+ rCmd = getRCmd(inputDir(), String.valueOf(H1),
String.valueOf(H2),
+ String.valueOf(epochs),
String.valueOf(batchsize), expectedDir());
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION =
rewrites;
//generate actual datasets
double[][] X = getRandomMatrix(rows, cols, 0, 1,
sparse?sparsity2:sparsity1, 714);
writeInputMatrixWithMTD("X", X, true);
-
+
+ //generate rand matrices for W1, W2, W3, W4 here itself
for passing onto both DML and R scripts
+ double[][] W1_rand = getRandomMatrix(H1, cols, 0, 1,
sparse?sparsity2:sparsity1, 800);
+ writeInputMatrixWithMTD("W1_rand", W1_rand, true);
+ double[][] W2_rand = getRandomMatrix(H2, H1, 0, 1,
sparse?sparsity2:sparsity1, 900);
+ writeInputMatrixWithMTD("W2_rand", W2_rand, true);
+ double[][] W3_rand = getRandomMatrix(H1, H2, 0, 1,
sparse?sparsity2:sparsity1, 589);
+ writeInputMatrixWithMTD("W3_rand", W3_rand, true);
+ double[][] W4_rand = getRandomMatrix(cols, H1, 0, 1,
sparse?sparsity2:sparsity1, 150);
+ writeInputMatrixWithMTD("W4_rand", W4_rand, true);
+ double[][] order_rand = getRandomMatrix(rows, 1, 0, 1,
sparse?sparsity2:sparsity1, 143);
+ writeInputMatrixWithMTD("order_rand", order_rand,
true); //for the permut operation on input X
+
//run script
runTest(true, false, null, -1);
- //TODO R script
+ runRScript(true);
+
+ HashMap<MatrixValue.CellIndex, Double> dmlW1 =
readDMLMatrixFromHDFS("W1");
+ HashMap<MatrixValue.CellIndex, Double> dmlW2 =
readDMLMatrixFromHDFS("W2");
+ HashMap<MatrixValue.CellIndex, Double> dmlW3 =
readDMLMatrixFromHDFS("W3");
+ HashMap<MatrixValue.CellIndex, Double> dmlW4 =
readDMLMatrixFromHDFS("W4");
+ HashMap<MatrixValue.CellIndex, Double> dmlb1 =
readDMLMatrixFromHDFS("b1");
+ HashMap<MatrixValue.CellIndex, Double> dmlb2 =
readDMLMatrixFromHDFS("b2");
+ HashMap<MatrixValue.CellIndex, Double> dmlb3 =
readDMLMatrixFromHDFS("b3");
+ HashMap<MatrixValue.CellIndex, Double> dmlb4 =
readDMLMatrixFromHDFS("b4");
+ HashMap<MatrixValue.CellIndex, Double> rW1 =
readRMatrixFromFS("W1");
+ HashMap<MatrixValue.CellIndex, Double> rW2 =
readRMatrixFromFS("W2");
+ HashMap<MatrixValue.CellIndex, Double> rW3 =
readRMatrixFromFS("W3");
+ HashMap<MatrixValue.CellIndex, Double> rW4 =
readRMatrixFromFS("W4");
+ HashMap<MatrixValue.CellIndex, Double> rb1 =
readRMatrixFromFS("b1");
+ HashMap<MatrixValue.CellIndex, Double> rb2 =
readRMatrixFromFS("b2");
+ HashMap<MatrixValue.CellIndex, Double> rb3 =
readRMatrixFromFS("b3");
+ HashMap<MatrixValue.CellIndex, Double> rb4 =
readRMatrixFromFS("b4");
+ TestUtils.compareMatrices(dmlW1, rW1, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlW2, rW2, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlW3, rW3, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlW4, rW4, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlb1, rb1, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlb2, rb2, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlb3, rb3, eps, "Stat-DML",
"Stat-R");
+ TestUtils.compareMatrices(dmlb4, rb4, eps, "Stat-DML",
"Stat-R");
Assert.assertTrue(heavyHittersContainsSubString("spoof")
|| heavyHittersContainsSubString("sp_spoof"));
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R
b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R
new file mode 100644
index 0000000..1835540
--- /dev/null
+++ b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.R
@@ -0,0 +1,239 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library("Matrix")
+
+#1. tanh function
+func = function(X){
+ Y = tanh(X)
+ return(Y)
+}
+
+func1 = function(X) {
+ Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+ Y_prime = 1 - Y^2
+ return(Y_prime)
+}
+
+#2. feedForward
+
+obj <- function(E){
+ val = 0.5 * sum(E^2)
+ return(val)
+}
+
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+W1_rand = readMM(paste(args[1], "W1_rand.mtx", sep=""));
+W2_rand = readMM(paste(args[1], "W2_rand.mtx", sep=""));
+W3_rand = readMM(paste(args[1], "W3_rand.mtx", sep=""));
+W4_rand = readMM(paste(args[1], "W4_rand.mtx", sep=""));
+order_rand = readMM(paste(args[1], "order_rand.mtx", sep=""));
+
+num_hidden1 = as.integer(args[2]) #$H1
+num_hidden2 = as.integer(args[3]) #$H2
+max_epochs = as.integer(args[4]) #$EPOCH
+batch_size = as.integer(args[5]) #$BATCH
+
+mu = 0.9 # momentum
+step = 1e-5
+decay = 0.95
+hfile = " "
+fmt = "text"
+full_obj = FALSE
+
+n = nrow(X)
+m = ncol(X)
+
+#randomly reordering rows
+#permut = table(seq(from=1,to=n,by=1), order(runif(n, min=0, max=1)))
+permut = table(seq(from=1,to=n,by=1), order(order_rand))
+permut = as.data.frame.matrix(permut)
+permut = data.matrix(permut)
+X = (permut %*% X)
+#z-transform, whitening operator is better
+means = t(as.matrix(colSums(X)))/n
+csx2 = t(as.matrix(colSums(X^2)))/n
+stds = sqrt(csx2 - (means*means)*n/(n-1)) + 1e-17
+X = (X - matrix(1, nrow(X),1) %*% means)/(matrix(1,nrow(X),1) %*% stds)
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand
+b1 = matrix(0, num_hidden1, 1)
+
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand
+b2 = matrix(0, num_hidden2, 2)
+
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand
+b3 = matrix(0, num_hidden1, 1)
+
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand
+b4 = matrix(0, m, 1)
+
+upd_W1 = matrix(0, nrow(W1), ncol(W1))
+upd_b1 = matrix(0, nrow(b1), ncol(b1))
+upd_W2 = matrix(0, nrow(W2), ncol(W2))
+upd_b2 = matrix(0, nrow(b2), ncol(b2))
+upd_W3 = matrix(0, nrow(W3), ncol(W3))
+upd_b3 = matrix(0, nrow(b3), ncol(b3))
+upd_W4 = matrix(0, nrow(W4), ncol(W4))
+upd_b4 = matrix(0, nrow(b4), ncol(b4))
+
+if( full_obj ){
+ # nothing to do here
+}
+
+iter = 0
+num_iters_per_epoch = ceiling(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+# debug
+# print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" +
max_iterations)
+beg = 1
+while( iter < max_iterations ) {
+ end = beg + batch_size - 1
+ if(end > n) end = n
+ X_batch = X[beg:end,]
+
+ # Notation:
+ # 1 2 3 4 5 6 7 8 9
+ # [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E]
+ # tmp_ff = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+ # H1 = tmp_ff[1]; H1_prime = tmp_ff[2]; H2 = tmp_ff[3]; H2_prime = tmp_ff[4];
+ # H3 = tmp_ff[5]; H3_prime = tmp_ff[6]; Yhat = tmp_ff[7]; Yhat_prime =
tmp_ff[8];
+ # E = tmp_ff[9]
+ # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X_batch
+ H1_in = t(W1 %*% t(X_batch) + b1 %*% matrix(1,ncol(b1),nrow(X_batch)))
+ H1 = func(H1_in)
+ H1_prime = func1(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2%*% matrix(1,ncol(b2),nrow(H1)))
+ H2 = func(H2_in)
+ H2_prime = func1(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3%*% matrix(1,ncol(b3),nrow(H2)))
+ H3 = func(H3_in)
+ H3_prime = func1(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4%*% matrix(1,ncol(b4),nrow(H3)))
+ Yhat = func(Yhat_in)
+ Yhat_prime = func1(Yhat_in)
+
+ E = Yhat - X_batch
+
+ # Notation:
+ # 1 2 3 4 5 6 7 8
+ # [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad,W4_grad, b4_grad]
+ # tmp_grad = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime,
Yhat_prime, E, W1, W2, W3, W4)
+ # W1_grad = tmp_grad[1]; b1_grad = tmp_grad[2]; W2_grad = tmp_grad[3];
b2_grad = tmp_grad[4];
+ # W3_grad = tmp_grad[5]; b3_grad = tmp_grad[6]; W4_grad = tmp_grad[7];
b4_grad = tmp_grad[8];
+ # grad function
+
+ #backprop
+ delta4 = E * Yhat_prime
+ delta3 = H3_prime * (delta4 %*% W4)
+ delta2 = H2_prime * (delta3 %*% W3)
+ delta1 = H1_prime * (delta2 %*% W2)
+
+ #compute gradients
+ b4_grad = (colSums(delta4))
+ b3_grad = (colSums(delta3))
+ b2_grad = (colSums(delta2))
+ b1_grad = (colSums(delta1))
+
+ W4_grad = t(delta4) %*% H3
+ W3_grad = t(delta3) %*% H2
+ W2_grad = t(delta2) %*% H1
+ W1_grad = t(delta1) %*% X_batch
+
+ ob = obj(E)
+ epochs = iter / num_iters_per_epoch
+ # debug
+ # print(table(epochs, ob), zero.print = "0")
+
+ #update
+ local_step = step / nrow(X_batch)
+ upd_W1 = mu * upd_W1 - local_step * W1_grad
+ upd_b1 = mu * upd_b1 - local_step * b1
+ upd_W2 = mu * upd_W2 - local_step * W2_grad
+ upd_b2 = mu * upd_b2 - local_step * b2
+ upd_W3 = mu * upd_W3 - local_step * W3_grad
+ upd_b3 = mu * upd_b3 - local_step * b3
+ upd_W4 = mu * upd_W4 - local_step * W4_grad
+ upd_b4 = mu * upd_b4 - local_step * b4
+ W1 = W1 + upd_W1
+ b1 = b1 + upd_b1
+ W2 = W2 + upd_W2
+ b2 = b2 + upd_b2
+ W3 = W3 + upd_W3
+ b3 = b3 + upd_b3
+ W4 = W4 + upd_W4
+ b4 = b4 + upd_b4
+
+ iter = iter + 1
+ if(end == n) beg = 1
+ else beg = end + 1
+
+ if(iter %% num_iters_per_epoch == 0)
+ step = step * decay
+
+ if(full_obj & iter %% num_iters_per_epoch == 0 ) {
+ # Notation:
+ # tmp_ff = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+ # full_H1 = tmp_ff[1]; full_H1_prime = tmp_ff[2]; full_H2 = tmp_ff[3];
full_H2_prime = tmp_ff[4];
+ # full_H3 = tmp_ff[5]; full_H3_prime = tmp_ff[6]; full_Yhat = tmp_ff[7];
full_Yhat_prime = tmp_ff[8];
+ # full_E = tmp_ff[9];
+ # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X
+ H1_in = t(W1 %*% t(X) + b1 %*% matrix(1,ncol(b1),nrow(X)))
+
+ full_H1 = func(H1_in)
+ full_H1_prime = func1(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2%*% matrix(1,ncol(b2),nrow(H1)))
+ full_H2 = func(H2_in)
+ full_H2_prime = func1(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3%*% matrix(1,ncol(b3),nrow(H2)))
+ full_H3 = func(H3_in)
+ full_H3_prime = func1(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4%*% matrix(1,ncol(b4),nrow(H3)))
+ full_Yhat = func(Yhat_in)
+ full_Yhat_prime = func1(Yhat_in)
+ full_E = full_Yhat - X
+
+ full_o = obj(full_E)
+ epochs = iter %/% num_iters_per_epoch
+ # debug
+ # print(table(epochs, full_o, deparse.level=2), zero.print=".")
+ # print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" +
full_o)
+ }
+}
+
+#debug
+#print.table(W1, digits=3)
+writeMM(as(W1,"CsparseMatrix"), paste(args[6], "W1", sep=""));
+writeMM(as(b1,"CsparseMatrix"), paste(args[6], "b1", sep=""));
+writeMM(as(W2,"CsparseMatrix"), paste(args[6], "W2", sep=""));
+writeMM(as(b2,"CsparseMatrix"), paste(args[6], "b2", sep=""));
+writeMM(as(W3,"CsparseMatrix"), paste(args[6], "W3", sep=""));
+writeMM(as(b3,"CsparseMatrix"), paste(args[6], "b3", sep=""));
+writeMM(as(W4,"CsparseMatrix"), paste(args[6], "W4", sep=""));
+writeMM(as(b4,"CsparseMatrix"), paste(args[6], "b4", sep=""));
diff --git a/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
new file mode 100644
index 0000000..c0624a1
--- /dev/null
+++ b/src/test/scripts/functions/codegenalg/Algorithm_AutoEncoder.dml
@@ -0,0 +1,251 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Trains a 2-layer autoencoder with minibatch SGD, momentum and step-size
decay.
+# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+# The script standardizes the input before training (can be turned off).
+# Also, it randomly reshuffles rows before training.
+# Currently, tanh is set to be the activation function.
+# By re-implementing 'func' DML-bodied function, one can change the activation.
+
+# INPUT PARAMETERS:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# X String --- Filename where the input is stored
+# H1 Int --- Number of neurons in the 1st hidden layer
+# H2 Int --- Number of neurons in the 2nd hidden layer
+# EPOCH Int --- Number of epochs to train for
+# fmt String 'text' Output format ("text", "csv", "binary" etc.)
+# OBJ Boolean FALSE If TRUE, Computes objective function value
(squared-loss)
+# at the end of each epoch. Note that, computing
the full
+# objective can take a lot of time.
+# BATCH Int 256 Mini-batch size (training parameter)
+# STEP Double 1e-5 Initial step size (training parameter)
+# DECAY Double 0.95 Decays step size after each epoch (training
parameter)
+# MOMENTUM Double 0.9 Momentum parameter (training parameter)
+#
---------------------------------------------------------------------------------------------
+#
+# OUTPUT PARAMETERS (all filenames):
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# W1_out String --- File to store weights between input layer and 1st
hidden layer
+# b1_out String --- File to store bias between input layer and 1st
hidden layer
+# W2_out String --- File to store weights between 1st hidden layer and
2nd hidden layer
+# b2_out String --- File to store bias between 1st hidden layer and 2nd
hidden layer
+# W3_out String --- File to store weights between 2nd hidden layer and
3rd hidden layer
+# b3_out String --- File to store bias between 2nd hidden layer and 3rd
hidden layer
+# W4_out String --- File to store weights between 3rd hidden layer and
output layer
+# b4_out String --- File to store bias between 3rd hidden layer and
output layer
+# HIDDEN String " " File to store the hidden (2nd) layer representation
if needed
+#
---------------------------------------------------------------------------------------------
+#
+# INVOCATION:
+# -f autoencoder_2layer.dml --nvargs X=<input file> H1=500 H2=2 EPOCH=1
fmt="csv"
+# W1_out=<weights from input to 1st hidden layer> b1_out=<bias from input
to 1st hidden layer>
+# W2_out=<weights from 1st hidden layer to 2nd hidden layer> b2_out=<bias
from 1st hidden layer to 2nd hidden layer>
+# W3_out=<weights from 2nd hidden layer to 3rd hidden layer> b3_out=<bias
from 2nd hidden layer to 3rd hidden layer>
+# W4_out=<weights from 3rd hidden layer to output> b4_out=<bias from 3rd
hidden layer to output>
+#
+
+# NOTE: This is a copy of the `scripts/staging/autoencoder-2layer.dml` with
rand()
+# replaced with random matrices generated in the corresponding test class.
+
+#implements tanh
+#to use another activation fn, implement a DML-bodied function with
+#function name 'func' and comment out this one
+func = function(Matrix[Double] X) return(Matrix[Double] Y, Matrix[Double]
Y_prime){
+ Y = tanh(X)
+ Y_prime = 1 - Y^2
+}
+
+feedForward = function(Matrix[Double] X, Matrix[Double] W1, Matrix[Double] b1,
+ Matrix[Double] W2, Matrix[Double] b2, Matrix[Double] W3, Matrix[Double] b3,
+ Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] Y)
+ return(Matrix[Double] H1, Matrix[Double] H1_prime, Matrix[Double] H2,
+ Matrix[Double] H2_prime, Matrix[Double] H3, Matrix[Double] H3_prime,
+ Matrix[Double] Yhat, Matrix[Double] Yhat_prime, Matrix[Double] E)
+{
+ H1_in = t(W1 %*% t(X) + b1)
+ [H1, H1_prime] = func(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2)
+ [H2, H2_prime] = func(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3)
+ [H3, H3_prime] = func(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4)
+ [Yhat, Yhat_prime] = func(Yhat_in)
+ E = Yhat - Y
+}
+
+grad = function(Matrix[Double] X, Matrix[Double] H1, Matrix[Double] H1_prime,
+ Matrix[Double] H2, Matrix[Double] H2_prime, Matrix[Double] H3,
+ Matrix[Double] H3_prime, Matrix[Double] Yhat_prime, Matrix[Double] E,
+ Matrix[Double] W1, Matrix[Double] W2, Matrix[Double] W3, Matrix[Double] W4)
+ return(Matrix[Double] W1_grad, Matrix[Double] b1_grad, Matrix[Double]
W2_grad,
+ Matrix[Double] b2_grad, Matrix[Double] W3_grad, Matrix[Double] b3_grad,
+ Matrix[Double] W4_grad, Matrix[Double] b4_grad)
+{
+ #backprop
+ delta4 = E * Yhat_prime
+ delta3 = H3_prime * (delta4 %*% W4)
+ delta2 = H2_prime * (delta3 %*% W3)
+ delta1 = H1_prime * (delta2 %*% W2)
+
+ #compute gradients
+ b4_grad = t(colSums(delta4))
+ b3_grad = t(colSums(delta3))
+ b2_grad = t(colSums(delta2))
+ b1_grad = t(colSums(delta1))
+
+ W4_grad = t(delta4) %*% H3
+ W3_grad = t(delta3) %*% H2
+ W2_grad = t(delta2) %*% H1
+ W1_grad = t(delta1) %*% X
+}
+
+obj = function(Matrix[Double] E) return(Double val){
+ val = 0.5 * sum(E^2)
+}
+
+batch_size = ifdef($BATCH, 256)
+mu = ifdef($MOMENTUM, 0.9)
+step = ifdef($STEP, 1e-5)
+decay = ifdef($DECAY, 0.95)
+hfile = ifdef($HIDDEN, " ")
+fmt = ifdef($fmt, "text")
+full_obj = ifdef($OBJ, FALSE)
+
+X = read($X)
+num_hidden1 = $H1
+num_hidden2 = $H2
+max_epochs = $EPOCH
+W1_rand = read($W1_rand)
+W2_rand = read($W2_rand)
+W3_rand = read($W3_rand)
+W4_rand = read($W4_rand)
+order_rand = read($order_rand)
+
+n = nrow(X)
+m = ncol(X)
+
+#randomly reordering rows
+#permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1,
pdf="uniform"), by=1, index.return=TRUE), n, n)
+permut = table(seq(1,n,1), order(target=order_rand, by=1, index.return=TRUE),
n, n)
+X = permut %*% X
+
+#z-transform, whitening operator is better
+means = colSums(X)/n
+stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17
+X = (X - means)/stds
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand #Rand(rows=num_hidden1, cols=m,
min=-1, max=1, pdf="uniform")
+b1 = matrix(0, rows=num_hidden1, cols=1)
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand #Rand(rows=num_hidden2,
cols=num_hidden1, min=-1, max=1, pdf="uniform")
+b2 = matrix(0, rows=num_hidden2, cols=1)
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand #Rand(rows=num_hidden1,
cols=num_hidden2, min=-1, max=1, pdf="uniform")
+b3 = matrix(0, rows=num_hidden1, cols=1)
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand #Rand(rows=m, cols=num_hidden1,
min=-1, max=1, pdf="uniform")
+b4 = matrix(0, rows=m, cols=1)
+
+upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1))
+upd_b1 = matrix(0, rows=nrow(b1), cols=ncol(b1))
+upd_W2 = matrix(0, rows=nrow(W2), cols=ncol(W2))
+upd_b2 = matrix(0, rows=nrow(b2), cols=ncol(b2))
+upd_W3 = matrix(0, rows=nrow(W3), cols=ncol(W3))
+upd_b3 = matrix(0, rows=nrow(b3), cols=ncol(b3))
+upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4))
+upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4))
+
+if( full_obj ){
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime,
+ full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3,
b3, W4, b4, X)
+ full_o = obj(full_E)
+ print("EPOCHS=" + 0 + " OBJ (FULL DATA): " + full_o)
+}
+
+iter = 0
+num_iters_per_epoch = ceil(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+#print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" +
max_iterations)
+beg = 1
+while( iter < max_iterations ) {
+ end = beg + batch_size - 1
+ if(end > n) end = n
+ X_batch = X[beg:end,]
+
+ [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E] =
feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+ [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad] =
grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2,
W3, W4)
+
+ o = obj(E)
+ print("epochs=%5.4f BATCH beg=%d end=%d obj=%f", (iter /
num_iters_per_epoch), beg, end, o)
+
+ #update
+ local_step = step / nrow(X_batch)
+ upd_W1 = mu * upd_W1 - local_step * W1_grad
+ upd_b1 = mu * upd_b1 - local_step * b1
+ upd_W2 = mu * upd_W2 - local_step * W2_grad
+ upd_b2 = mu * upd_b2 - local_step * b2
+ upd_W3 = mu * upd_W3 - local_step * W3_grad
+ upd_b3 = mu * upd_b3 - local_step * b3
+ upd_W4 = mu * upd_W4 - local_step * W4_grad
+ upd_b4 = mu * upd_b4 - local_step * b4
+ W1 = W1 + upd_W1
+ b1 = b1 + upd_b1
+ W2 = W2 + upd_W2
+ b2 = b2 + upd_b2
+ W3 = W3 + upd_W3
+ b3 = b3 + upd_b3
+ W4 = W4 + upd_W4
+ b4 = b4 + upd_b4
+
+ iter = iter + 1
+ if(end == n) beg = 1
+ else beg = end + 1
+
+ if( iter %% num_iters_per_epoch == 0 ) step = step * decay
+
+ if( full_obj & iter %% num_iters_per_epoch == 0 ){
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime,
full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3,
W4, b4, X)
+ full_o = obj(full_E)
+ epochs = iter %/% num_iters_per_epoch
+ print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o)
+ }
+}
+
+write(W1, $W1_out, format=fmt)
+write(b1, $b1_out, format=fmt)
+write(W2, $W2_out, format=fmt)
+write(b2, $b2_out, format=fmt)
+write(W3, $W3_out, format=fmt)
+write(b3, $b3_out, format=fmt)
+write(W4, $W4_out, format=fmt)
+write(b4, $b4_out, format=fmt)
+
+if( hfile != " " ){
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime,
full_Yhat, full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3,
W4, b4, X)
+ reordered_H = t(permut) %*% full_H2
+ write(reordered_H, hfile, format=fmt)
+}