Repository: incubator-systemml
Updated Branches:
  refs/heads/master aa2211ac0 -> 43c321d18


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml 
b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
new file mode 100644
index 0000000..e5755c4
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
@@ -0,0 +1,331 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * MNIST LeNet Example
+ */
+# Imports
+source("nn/layers/affine.dml") as affine
+source("nn/layers/conv2d_builtin.dml") as conv2d
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/dropout.dml") as dropout
+source("nn/layers/l2_reg.dml") as l2_reg
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d
+source("nn/layers/relu.dml") as relu
+source("nn/layers/softmax.dml") as softmax
+source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
+
+train = function(matrix[double] X, matrix[double] y,
+                 matrix[double] X_val, matrix[double] y_val,
+                 int C, int Hin, int Win, int epochs)
+    return (matrix[double] W1, matrix[double] b1,
+            matrix[double] W2, matrix[double] b2,
+            matrix[double] W3, matrix[double] b3,
+            matrix[double] W4, matrix[double] b4) {
+  /*
+   * Trains a convolutional net using the "LeNet" architecture.
+   *
+   * The input matrix, X, has N examples, each represented as a 3D
+   * volume unrolled into a single vector.  The targets, y, have K
+   * classes, and are one-hot encoded.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - y: Target matrix, of shape (N, K).
+   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).
+   *  - y_val: Target validation matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - epochs: Total number of full training loops over the full data set.
+   *
+   * Outputs:
+   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
+   *  - b1: 1st layer biases vector, of shape (F1, 1).
+   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
+   *  - b2: 2nd layer biases vector, of shape (F2, 1).
+   *  - W3: 3rd layer weights (parameters) matrix, of shape 
(F2*(Hin/4)*(Win/4), N3).
+   *  - b3: 3rd layer biases vector, of shape (1, N3).
+   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
+   *  - b4: 4th layer biases vector, of shape (1, K).
+   */
+  N = nrow(X)
+  K = ncol(y)
+
+  # Create network:
+  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> 
affine4 -> softmax
+  Hf = 5  # filter height
+  Wf = 5  # filter width
+  stride = 1
+  pad = 2  # For same dimensions, (Hf - stride) / 2
+
+  F1 = 32  # num conv filters in conv1
+  F2 = 64  # num conv filters in conv2
+  N3 = 512  # num nodes in affine3
+  # Note: affine4 has K nodes, which is equal to the number of target 
dimensions (num classes)
+
+  [W1, b1] = conv2d::init(F1, C, Hf, Wf)  # inputs: (N, C*Hin*Win)
+  [W2, b2] = conv2d::init(F2, F1, Hf, Wf)  # inputs: (N, F1*(Hin/2)*(Win/2))
+  [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3)  # inputs: (N, 
F2*(Hin/2/2)*(Win/2/2))
+  [W4, b4] = affine::init(N3, K)  # inputs: (N, N3)
+  W4 = W4 / sqrt(2)  # different initialization, since being fed into softmax, 
instead of relu
+
+  # Initialize SGD w/ Nesterov momentum optimizer
+  lr = 0.01  # learning rate
+  mu = 0.9  #0.5  # momentum
+  decay = 0.95  # learning rate decay constant
+  vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1)
+  vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2)
+  vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3)
+  vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4)
+
+  # Regularization
+  lambda = 5e-04
+
+  # Optimize
+  print("Starting optimization")
+  batch_size = 64
+  iters = ceil(N / batch_size)
+  for (e in 1:epochs) {
+    for(i in 1:iters) {
+      # Get next batch
+      beg = ((i-1) * batch_size) %% N + 1
+      end = min(N, beg + batch_size - 1)
+      X_batch = X[beg:end,]
+      y_batch = y[beg:end,]
+
+      # Compute forward pass
+      ## layer 1: conv1 -> relu1 -> pool1
+      [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, 
Hf, Wf, stride, stride,
+                                                pad, pad)
+      outr1 = relu::forward(outc1)
+      [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, 
Hf=2, Wf=2,
+                                                    strideh=2, stridew=2, 
pad=0, pad=0)
+      ## layer 2: conv2 -> relu2 -> pool2
+      [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, 
Woutp1, Hf, Wf,
+                                                stride, stride, pad, pad)
+      outr2 = relu::forward(outc2)
+      [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, 
Hf=2, Wf=2,
+                                                    strideh=2, stridew=2, 
pad=0, pad=0)
+      ## layer 3:  affine3 -> relu3 -> dropout
+      outa3 = affine::forward(outp2, W3, b3)
+      outr3 = relu::forward(outa3)
+      [outd3, maskd3] = dropout::forward(outr3, 0.5, -1)
+      ## layer 4:  affine4 -> softmax
+      outa4 = affine::forward(outd3, W4, b4)
+      probs = softmax::forward(outa4)
+
+      # Compute loss & accuracy for training & validation data every 100 
iterations.
+      if (i %% 100 == 0) {
+        # Compute training loss & accuracy
+        loss_data = cross_entropy_loss::forward(probs, y_batch)
+        loss_reg_W1 = l2_reg::forward(W1, lambda)
+        loss_reg_W2 = l2_reg::forward(W2, lambda)
+        loss_reg_W3 = l2_reg::forward(W3, lambda)
+        loss_reg_W4 = l2_reg::forward(W4, lambda)
+        loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + 
loss_reg_W4
+        accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch))
+
+        # Compute validation loss & accuracy
+        probs_val = predict(X_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)
+        loss_val = cross_entropy_loss::forward(probs_val, y_val)
+        accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val))
+
+        # Output results
+        print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", 
Train Accuracy: "
+              + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + 
accuracy_val)
+      }
+
+      # Compute data backward pass
+      ## loss:
+      dprobs = cross_entropy_loss::backward(probs, y_batch)
+      ## layer 4:  affine4 -> softmax
+      douta4 = softmax::backward(dprobs, outa4)
+      [doutd3, dW4, db4] = affine::backward(douta4, outr3, W4, b4)
+      ## layer 3:  affine3 -> relu3 -> dropout
+      doutr3 = dropout::backward(doutd3, outr3, 0.5, maskd3)
+      douta3 = relu::backward(doutr3, outa3)
+      [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)
+      ## layer 2: conv2 -> relu2 -> pool2
+      doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, 
Woutc2, Hf=2, Wf=2,
+                                    strideh=2, stridew=2, pad=0, pad=0)
+      doutc2 = relu::backward(doutr2, outc2)
+      [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, 
b2, F1,
+                                            Houtp1, Woutp1, Hf, Wf, stride, 
stride, pad, pad)
+      ## layer 1: conv1 -> relu1 -> pool1
+      doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, 
Woutc1, Hf=2, Wf=2,
+                                    strideh=2, stridew=2, pad=0, pad=0)
+      doutc1 = relu::backward(doutr1, outc1)
+      [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, 
W1, b1, C, Hin, Win,
+                                              Hf, Wf, stride, stride, pad, pad)
+
+      # Compute regularization backward pass
+      dW1_reg = l2_reg::backward(W1, lambda)
+      dW2_reg = l2_reg::backward(W2, lambda)
+      dW3_reg = l2_reg::backward(W3, lambda)
+      dW4_reg = l2_reg::backward(W4, lambda)
+      dW1 = dW1 + dW1_reg
+      dW2 = dW2 + dW2_reg
+      dW3 = dW3 + dW3_reg
+      dW4 = dW4 + dW4_reg
+
+      # Optimize with SGD w/ Nesterov momentum
+      [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
+      [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1)
+      [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2)
+      [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2)
+      [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3)
+      [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3)
+      [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4)
+      [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4)
+    }
+    # Anneal momentum towards 0.999
+    #mu = mu + (0.999 - mu)/(1+epochs-e)
+    # Decay learning rate
+    lr = lr * decay
+  }
+}
+
+predict = function(matrix[double] X, int C, int Hin, int Win,
+                   matrix[double] W1, matrix[double] b1,
+                   matrix[double] W2, matrix[double] b2,
+                   matrix[double] W3, matrix[double] b3,
+                   matrix[double] W4, matrix[double] b4)
+    return (matrix[double] probs) {
+  /*
+   * Computes the class probability predictions of a convolutional
+   * net using the "LeNet" architecture.
+   *
+   * The input matrix, X, has N examples, each represented as a 3D
+   * volume unrolled into a single vector.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
+   *  - b1: 1st layer biases vector, of shape (F1, 1).
+   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
+   *  - b2: 2nd layer biases vector, of shape (F2, 1).
+   *  - W3: 3rd layer weights (parameters) matrix, of shape 
(F2*(Hin/4)*(Win/4), N3).
+   *  - b3: 3rd layer biases vector, of shape (1, N3).
+   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
+   *  - b4: 4th layer biases vector, of shape (1, K).
+   *
+   * Outputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   */
+  N = nrow(X)
+
+  # Network:
+  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> 
affine4 -> softmax
+  Hf = 5  # filter height
+  Wf = 5  # filter width
+  stride = 1
+  pad = 2  # For same dimensions, (Hf - stride) / 2
+
+  F1 = nrow(W1)  # num conv filters in conv1
+  F2 = nrow(W2)  # num conv filters in conv2
+  N3 = ncol(W3)  # num nodes in affine3
+  K = ncol(W4)  # num nodes in affine4, equal to number of target dimensions 
(num classes)
+
+  # Compute predictions over mini-batches
+  probs = matrix(0, rows=N, cols=K)
+  batch_size = 64
+  iters = ceil(N / batch_size)
+  for(i in 1:iters) {
+    # Get next batch
+    beg = ((i-1) * batch_size) %% N + 1
+    end = min(N, beg + batch_size - 1)
+    X_batch = X[beg:end,]
+
+    # Compute forward pass
+    ## layer 1: conv1 -> relu1 -> pool1
+    [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, 
Hf, Wf, stride, stride,
+                                              pad, pad)
+    outr1 = relu::forward(outc1)
+    [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, 
Hf=2, Wf=2,
+                                                  strideh=2, stridew=2, pad=0, 
pad=0)
+    ## layer 2: conv2 -> relu2 -> pool2
+    [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, 
Woutp1, Hf, Wf,
+                                              stride, stride, pad, pad)
+    outr2 = relu::forward(outc2)
+    [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, 
Hf=2, Wf=2,
+                                                  strideh=2, stridew=2, pad=0, 
pad=0)
+    ## layer 3:  affine3 -> relu3
+    outa3 = affine::forward(outp2, W3, b3)
+    outr3 = relu::forward(outa3)
+    ## layer 4:  affine4 -> softmax
+    outa4 = affine::forward(outr3, W4, b4)
+    probs_batch = softmax::forward(outa4)
+
+    # Store predictions
+    probs[beg:end,] = probs_batch
+  }
+}
+
+eval = function(matrix[double] probs, matrix[double] y)
+    return (double loss, double accuracy) {
+  /*
+   * Evaluates a convolutional net using the "LeNet" architecture.
+   *
+   * The probs matrix contains the class probability predictions
+   * of K classes over N examples.  The targets, y, have K classes,
+   * and are one-hot encoded.
+   *
+   * Inputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   *  - y: Target matrix, of shape (N, K).
+   *
+   * Outputs:
+   *  - loss: Scalar loss, of shape (1).
+   *  - accuracy: Scalar accuracy, of shape (1).
+   */
+  # Compute loss & accuracy
+  loss = cross_entropy_loss::forward(probs, y)
+  correct_pred = rowIndexMax(probs) == rowIndexMax(y)
+  accuracy = mean(correct_pred)
+}
+
+generate_dummy_data = function()
+    return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) {
+  /*
+   * Generate a dummy dataset similar to the MNIST dataset.
+   *
+   * Outputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - y: Target matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   */
+  # Generate dummy input data
+  N = 1024  # num examples
+  C = 1  # num input channels
+  Hin = 28  # input height
+  Win = 28  # input width
+  K = 10  # num target classes
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+  classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform"))
+  y = table(seq(1, N), classes)  # one-hot encoding
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml 
b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
new file mode 100644
index 0000000..4c8c434
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
@@ -0,0 +1,77 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# MNIST Softmax - Predict
+#
+# This script computes the class probability predictions of a
+# trained softmax classifier on images of handwritten digits.
+#
+# Inputs:
+#  - X: File containing training images.
+#     The format is "pixel_1, pixel_2, ..., pixel_n".
+#  - model_dir: Directory containing the trained weights and biases
+#     of the model.
+#  - out_dir: Directory to store class probability predictions for
+#     each image.
+#  - fmt: [DEFAULT: "csv"] File format of `X` and output predictions.
+#     Options include: "csv", "mm", "text", and "binary".
+#
+# Outputs:
+#  - probs: File containing class probability predictions for each
+#     image.
+#
+# Data:
+# The X file should contain images of handwritten digits,
+# where each example is a 28x28 pixel image of grayscale values in
+# the range [0,255] stretched out as 784 pixels.
+#
+# Sample Invocation:
+# 1. Download images.
+#
+#   For example, save images to `nn/examples/data/mnist/images.csv`.
+#
+# 2. Execute using Spark
+#   ```
+#   spark-submit --master local[*] --driver-memory 5G
+#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
+#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-predict.dml
+#   -nvargs X=nn/examples/data/mnist/images.csv
+#   model_dir=nn/examples/model/mnist_softmax out_dir=nn/examples/data/mnist
+#
+source("nn/examples/mnist_softmax.dml") as mnist_softmax
+
+# Read training data
+fmt = ifdef($fmt, "csv")
+X = read($X, format=fmt)
+
+# Scale images to [0,1], and one-hot encode the labels
+X = X / 255.0
+
+# Read model coefficients
+W = read($model_dir+"/W")
+b = read($model_dir+"/b")
+
+# Predict classes
+probs = mnist_softmax::predict(X, W, b)
+
+# Output results
+write(probs, $out_dir+"/probs."+fmt, format=fmt)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml 
b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
new file mode 100644
index 0000000..09970f0
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
@@ -0,0 +1,110 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# MNIST Softmax - Train
+#
+# This script trains a softmax classifier on images of handwritten
+# digits.
+#
+# Inputs:
+#  - train: File containing labeled MNIST training images.
+#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
+#  - test: File containing labeled MNIST test images.
+#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
+#  - out_dir: Directory to store weights and bias matrices of
+#     trained model, as well as final test accuracy.
+#  - fmt: [DEFAULT: "csv"] File format of `train` and `test` data.
+#     Options include: "csv", "mm", "text", and "binary".
+#
+# Outputs:
+#  - W: File containing the trained weights of the model.
+#  - b: File containing the trained biases of the model.
+#  - accuracy: File containing the final accuracy on the test data.
+#
+# Data:
+# The MNIST dataset contains labeled images of handwritten digits,
+# where each example is a 28x28 pixel image of grayscale values in
+# the range [0,255] stretched out as 784 pixels, and each label is
+# one of 10 possible digits in [0,9].
+#
+# Sample Invocation (running from wihtin the `examples` folder):
+# 1. Download data (60,000 training examples, and 10,000 test examples)
+#   ```
+#   nn/examples/get_mnist_data.sh
+#   ```
+#
+# 2. Execute using Spark
+#   ```
+#   spark-submit --master local[*] --driver-memory 10G
+#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
+#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-train.dml
+#   -nvargs train=nn/examples/data/mnist/mnist_train.csv 
test=nn/examples/data/mnist/mnist_test.csv
+#   epochs=1 out_dir=nn/examples/model/mnist_softmax
+#   ```
+#
+source("nn/examples/mnist_softmax.dml") as mnist_softmax
+
+# Read training data
+fmt = ifdef($fmt, "csv")
+train = read($train, format=fmt)
+test = read($test, format=fmt)
+epochs = ifdef($epochs, 1)
+out_dir = ifdef($out_dir, ".")
+
+# Extract images and labels
+images = train[,2:ncol(train)]
+labels = train[,1]
+X_test = test[,2:ncol(test)]
+y_test = test[,1]
+
+# Scale images to [0,1], and one-hot encode the labels
+n = nrow(train)
+n_test = nrow(test)
+classes = 10
+images = images / 255.0
+labels = table(seq(1, n), labels+1, n, classes)
+X_test = X_test / 255.0
+y_test = table(seq(1, n_test), y_test+1, n_test, classes)
+
+# Split into training (55,000 examples) and validation (5,000 examples)
+X = images[5001:nrow(images),]
+X_val = images[1:5000,]
+y = labels[5001:nrow(images),]
+y_val = labels[1:5000,]
+
+# Train
+[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)
+
+# Write model out
+write(W, out_dir+"/W")
+write(b, out_dir+"/b")
+
+# Eval on test set
+probs = mnist_softmax::predict(X_test, W, b)
+[loss, accuracy] = mnist_softmax::eval(probs, y_test)
+
+# Output results
+print("Test Accuracy: " + accuracy)
+write(accuracy, out_dir+"/accuracy")
+
+print("")
+print("")
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml 
b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml
new file mode 100644
index 0000000..a529a12
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml
@@ -0,0 +1,178 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * MNIST Softmax Example
+ */
+# Imports
+source("nn/layers/affine.dml") as affine
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/softmax.dml") as softmax
+source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
+
+train = function(matrix[double] X, matrix[double] y,
+                 matrix[double] X_val, matrix[double] y_val,
+                 int epochs)
+    return (matrix[double] W, matrix[double] b) {
+  /*
+   * Trains a softmax classifier.
+   *
+   * The input matrix, X, has N examples, each with D features.
+   * The targets, y, have K classes, and are one-hot encoded.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - y: Target matrix, of shape (N, K).
+   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).
+   *  - y_val: Target validation matrix, of shape (N, K).
+   *  - epochs: Total number of full training loops over the full data set.
+   *
+   * Outputs:
+   *  - W: Weights (parameters) matrix, of shape (D, M).
+   *  - b: Biases vector, of shape (1, M).
+   */
+  N = nrow(X)  # num examples
+  D = ncol(X)  # num features
+  K = ncol(y)  # num classes
+
+  # Create softmax classifier:
+  # affine -> softmax
+  [W, b] = affine::init(D, K)
+  W = W / sqrt(2.0/(D)) * sqrt(1/(D))
+
+  # Initialize SGD w/ Nesterov momentum optimizer
+  lr = 0.2  # learning rate
+  mu = 0  # momentum
+  decay = 0.99  # learning rate decay constant
+  vW = sgd_nesterov::init(W)  # optimizer momentum state for W
+  vb = sgd_nesterov::init(b)  # optimizer momentum state for b
+
+  # Optimize
+  print("Starting optimization")
+  batch_size = 50
+  iters = 1000 #ceil(N / batch_size)
+  for (e in 1:epochs) {
+    for(i in 1:iters) {
+      # Get next batch
+      beg = ((i-1) * batch_size) %% N + 1
+      end = min(N, beg + batch_size - 1)
+      X_batch = X[beg:end,]
+      y_batch = y[beg:end,]
+
+      # Compute forward pass
+      ## affine & softmax:
+      out = affine::forward(X_batch, W, b)
+      probs = softmax::forward(out)
+
+      # Compute loss & accuracy for training & validation data
+      loss = cross_entropy_loss::forward(probs, y_batch)
+      accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch))
+      probs_val = predict(X_val, W, b)
+      loss_val = cross_entropy_loss::forward(probs_val, y_val)
+      accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val))
+      print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", 
Train Accuracy: " +
+            accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + 
accuracy_val)
+
+      # Compute backward pass
+      ## loss:
+      dprobs = cross_entropy_loss::backward(probs, y_batch)
+      ## affine & softmax:
+      dout = softmax::backward(dprobs, out)
+      [dX_batch, dW, db] = affine::backward(dout, X_batch, W, b)
+
+      # Optimize with SGD w/ Nesterov momentum
+      [W, vW] = sgd_nesterov::update(W, dW, lr, mu, vW)
+      [b, vb] = sgd_nesterov::update(b, db, lr, mu, vb)
+    }
+    # Anneal momentum towards 0.999
+    mu = mu + (0.999 - mu)/(1+epochs-e)
+    # Decay learning rate
+    lr = lr * decay
+  }
+}
+
+predict = function(matrix[double] X, matrix[double] W, matrix[double] b)
+    return (matrix[double] probs) {
+  /*
+   * Computes the class probability predictions of a softmax classifier.
+   *
+   * The input matrix, X, has N examples, each with D features.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - W: Weights (parameters) matrix, of shape (D, M).
+   *  - b: Biases vector, of shape (1, M).
+   *
+   * Outputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   */
+  # Compute forward pass
+  ## affine & softmax:
+  out = affine::forward(X, W, b)
+  probs = softmax::forward(out)
+}
+
+eval = function(matrix[double] probs, matrix[double] y)
+    return (double loss, double accuracy) {
+  /*
+   * Evaluates a softmax classifier.
+   *
+   * The probs matrix contains the class probability predictions
+   * of K classes over N examples.  The targets, y, have K classes,
+   * and are one-hot encoded.
+   *
+   * Inputs:
+   *  - probs: Class probabilities, of shape (N, K).
+   *  - y: Target matrix, of shape (N, K).
+   *
+   * Outputs:
+   *  - loss: Scalar loss, of shape (1).
+   *  - accuracy: Scalar accuracy, of shape (1).
+   */
+  # Compute loss & accuracy
+  loss = cross_entropy_loss::forward(probs, y)
+  correct_pred = rowIndexMax(probs) == rowIndexMax(y)
+  accuracy = mean(correct_pred)
+}
+
+generate_dummy_data = function()
+    return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) {
+  /*
+   * Generate a dummy dataset similar to the MNIST dataset.
+   *
+   * Outputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - y: Target matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   */
+  # Generate dummy input data
+  N = 1024  # num examples
+  C = 1  # num input channels
+  Hin = 28  # input height
+  Win = 28  # input width
+  T = 10  # num targets
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+  classes = round(rand(rows=N, cols=1, min=1, max=T, pdf="uniform"))
+  y = table(seq(1, N), classes)  # one-hot encoding
+}
+

Reply via email to