[SYSTEMML-618] SystemML-NN: Adding an MNIST "LeNet" neural net example, 
including a DML script and a Jupyter PySpark notebook.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3a3c1659
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3a3c1659
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3a3c1659

Branch: refs/heads/master
Commit: 3a3c16599433fb9a0a7b00cc2d8b7f8a0ca53b15
Parents: d47fe8f
Author: Mike Dusenberry <[email protected]>
Authored: Fri Jun 24 17:34:31 2016 -0700
Committer: Mike Dusenberry <[email protected]>
Committed: Fri Jun 24 17:34:32 2016 -0700

----------------------------------------------------------------------
 .../SystemML-NN/Example - MNIST LeNet.ipynb     | 231 ++++++++++++
 .../SystemML-NN/examples/mnist_lenet.dml        | 360 +++++++++++++++++++
 2 files changed, 591 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3a3c1659/scripts/staging/SystemML-NN/Example
 - MNIST LeNet.ipynb
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb 
b/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb
new file mode 100644
index 0000000..1926f3b
--- /dev/null
+++ b/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb   
@@ -0,0 +1,231 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quick Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Add SystemML PySpark API file.\n",
+    
"sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/3d5f9b11741f6d6ecc6af7cbaa1069cde32be838/src/main/java/org/apache/sysml/api/python/SystemML.py\";)\n",
+    "\n",
+    "# Create a SystemML MLContext object\n",
+    "from SystemML import MLContext\n",
+    "ml = MLContext(sc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Download Data - MNIST"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The MNIST dataset contains labeled images of handwritten digits, where 
each example is a 28x28 pixel image of grayscale values in the range [0,255] 
stretched out as 784 pixels, and each label is one of 10 possible digits in 
[0,9].  Here, we download 60,000 training examples, and 10,000 test examples, 
where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%%sh\n",
+    "mkdir -p examples/data/mnist/\n",
+    "cd examples/data/mnist/\n",
+    "curl -O http://pjreddie.com/media/files/mnist_train.csv\n";,
+    "curl -O http://pjreddie.com/media/files/mnist_test.csv";
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SystemML \"LeNet\" Neural Network"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "script = \"\"\"\n",
+    "source(\"examples/mnist_lenet.dml\") as mnist_lenet\n",
+    "\n",
+    "# Read training data\n",
+    "data = read($data, format=\"csv\")\n",
+    "n = nrow(data)\n",
+    "C = $C\n",
+    "Hin = $Hin\n",
+    "Win = $Win\n",
+    "\n",
+    "# Extract images and labels\n",
+    "images = data[,2:ncol(data)]\n",
+    "labels = data[,1]\n",
+    "\n",
+    "# Scale images to [-1,1], and one-hot encode the labels\n",
+    "images = (images / 255.0) * 2 - 1\n",
+    "labels = table(seq(1, n), labels+1, n, 10)\n",
+    "\n",
+    "# Split into training (55,000 examples) and validation (5,000 
examples)\n",
+    "X = images[5001:nrow(images),]\n",
+    "X_val = images[1:5000,]\n",
+    "y = labels[5001:nrow(images),]\n",
+    "y_val = labels[1:5000,]\n",
+    "\n",
+    "# Train\n",
+    "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, 
C, Hin, Win)\n",
+    "\n",
+    "# Write model out\n",
+    "write(W1, $W1out)\n",
+    "write(b1, $b1out)\n",
+    "write(W2, $W2out)\n",
+    "write(b2, $b2out)\n",
+    "write(W3, $W3out)\n",
+    "write(b3, $b3out)\n",
+    "write(W4, $W4out)\n",
+    "write(b4, $b4out)\n",
+    "\n",
+    "print(\"\")\n",
+    "print(\"\")\n",
+    "\"\"\"\n",
+    "ml.reset()\n",
+    "out = ml.executeScript(script, {\"data\": 
\"examples/data/mnist/mnist_train.csv\",\n",
+    "                                \"C\": 1, \"Hin\": 28, \"Win\": 28},\n",
+    "                       outputs=[\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", 
\"b3\", \"W4\", \"b4\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Extract model from SystemML back into PySpark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# Extract variables\n",
+    "W1 = out.getDF(sqlContext, \"W1\").sort(\"ID\").drop(\"ID\")\n",
+    "b1 = out.getDF(sqlContext, \"b1\").sort(\"ID\").drop(\"ID\")\n",
+    "W2 = out.getDF(sqlContext, \"W2\").sort(\"ID\").drop(\"ID\")\n",
+    "b2 = out.getDF(sqlContext, \"b2\").sort(\"ID\").drop(\"ID\")\n",
+    "W3 = out.getDF(sqlContext, \"W3\").sort(\"ID\").drop(\"ID\")\n",
+    "b3 = out.getDF(sqlContext, \"b3\").sort(\"ID\").drop(\"ID\")\n",
+    "W4 = out.getDF(sqlContext, \"W4\").sort(\"ID\").drop(\"ID\")\n",
+    "b4 = out.getDF(sqlContext, \"b4\").sort(\"ID\").drop(\"ID\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. Compute Test Accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "script = \"\"\"\n",
+    "source(\"examples/mnist_lenet.dml\") as mnist_lenet\n",
+    "\n",
+    "# Read test data\n",
+    "data = read($data, format=\"csv\")\n",
+    "n = nrow(data)\n",
+    "C = $C\n",
+    "Hin = $Hin\n",
+    "Win = $Win\n",
+    "\n",
+    "# Extract images and labels\n",
+    "X_test = data[,2:ncol(data)]\n",
+    "y_test = data[,1]\n",
+    "\n",
+    "# Scale images to [-1,1], and one-hot encode the labels\n",
+    "X_test = (X_test / 255.0) * 2 - 1\n",
+    "y_test = table(seq(1, n), y_test+1, n, 10)\n",
+    "\n",
+    "# Read model coefficients\n",
+    "W1 = read($W1)\n",
+    "b1 = read($b1)\n",
+    "W2 = read($W2)\n",
+    "b2 = read($b2)\n",
+    "W3 = read($W3)\n",
+    "b3 = read($b3)\n",
+    "W4 = read($W4)\n",
+    "b4 = read($b4)\n",
+    "\n",
+    "# Eval on test set\n",
+    "[loss, accuracy] = mnist_lenet::eval(X_test, y_test, C, Hin, Win, W1, b1, 
W2, b2, W3, b3, W4, b4)\n",
+    "\n",
+    "print(\"Test ;Accuracy: \" + accuracy)\n",
+    "\n",
+    "print(\"\")\n",
+    "print(\"\")\n",
+    "\"\"\"\n",
+    "ml.reset()\n",
+    "ml.executeScript(script, {\"data\": 
\"examples/data/mnist/mnist_train.csv\",\n",
+    "                          \"C\": 1, \"Hin\": 28, \"Win\": 28,\n",
+    "                          \"W1\": W1, \"b1\": b1,\n",
+    "                          \"W2\": W2, \"b2\": b2,\n",
+    "                          \"W3\": W3, \"b3\": b3,\n",
+    "                          \"W4\": W4, \"b4\": b4})"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3a3c1659/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml 
b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
new file mode 100644
index 0000000..bd6361f
--- /dev/null
+++ b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
@@ -0,0 +1,360 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * MNIST LeNet Example
+ */
+# Imports
+source("nn/layers/affine.dml") as affine
+source("nn/layers/conv_builtin.dml") as conv
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/l2_reg.dml") as l2_reg
+source("nn/layers/max_pool_builtin.dml") as max_pool
+source("nn/layers/relu.dml") as relu
+source("nn/layers/softmax.dml") as softmax
+source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
+
+train = function(matrix[double] X, matrix[double] y,
+                 matrix[double] X_val, matrix[double] y_val,
+                 int C, int Hin, int Win)
+    return (matrix[double] W1, matrix[double] b1,
+            matrix[double] W2, matrix[double] b2,
+            matrix[double] W3, matrix[double] b3,
+            matrix[double] W4, matrix[double] b4) {
+  /*
+   * Trains a convolutional net using the "LeNet" architecture.
+   *
+   * The input matrix, X, has N examples, each represented as a 3D
+   * volume unrolled into a single vector.  The targets, y, have K
+   * classes, and are one-hot encoded.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - y: Target matrix, of shape (N, K).
+   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).
+   *  - y_val: Target validation matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *
+   * Outputs:
+   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
+   *  - b1: 1st layer biases vector, of shape (F1, 1).
+   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
+   *  - b2: 2nd layer biases vector, of shape (F2, 1).
+   *  - W3: 3rd layer weights (parameters) matrix, of shape 
(F2*(Hin/4)*(Win/4), N3).
+   *  - b3: 3rd layer biases vector, of shape (1, N3).
+   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
+   *  - b4: 4th layer biases vector, of shape (1, K).
+   */
+  N = nrow(X)
+  K = ncol(y)
+
+  # Create network:
+  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> 
affine4 -> softmax
+  Hf = 5  # filter height
+  Wf = 5  # filter width
+  stride = 1
+  pad = 2  # For same dimensions, (Hf - stride) / 2
+
+  F1 = 32  # num conv filters in conv1
+  F2 = 64  # num conv filters in conv2
+  N3 = 512  # num nodes in affine3
+  # Note: affine4 has K nodes, which is equal to the number of target 
dimensions (num classes)
+
+  [W1, b1] = conv::init(F1, C, Hf, Wf)  # inputs: (N, C*Hin*Win)
+  [W2, b2] = conv::init(F2, F1, Hf, Wf)  # inputs: (N, F1*(Hin/2)*(Win/2))
+  [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3)  # inputs: (N, 
F2*(Hin/2/2)*(Win/2/2))
+  [W4, b4] = affine::init(N3, K)  # inputs: (N, N3)
+  W4 = W4 / sqrt(2)  # different initialization, since being fed into softmax, 
instead of relu
+
+  # Initialize SGD w/ Nesterov momentum optimizer
+  lr = 0.01  # learning rate
+  mu = 0.9  #0.5  # momentum
+  decay = 0.95  # learning rate decay constant
+  vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1)
+  vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2)
+  vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3)
+  vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4)
+
+  # Regularization
+  lambda = 5e-04
+
+  # Optimize
+  print("Starting optimization")
+  batch_size = 64
+  epochs = 10
+  iters = ceil(N / batch_size)
+  for (e in 1:epochs) {
+    for(i in 1:iters) {
+      # Get next batch
+      beg = ((i-1) * batch_size) %% N + 1
+      end = min(N, beg + batch_size - 1)
+      X_batch = X[beg:end,]
+      y_batch = y[beg:end,]
+
+      # Compute forward pass
+      ## layer 1: conv1 -> relu1 -> pool1
+      [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, 
Hf, Wf, stride, stride, pad, pad)
+      outr1 = relu::forward(outc1)
+      [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, 
Hf=2, Wf=2, strideh=2, stridew=2) 
+      ## layer 2: conv2 -> relu2 -> pool2
+      [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, 
Woutp1, Hf, Wf, stride, stride, pad, pad)
+      outr2 = relu::forward(outc2)
+      [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, 
Hf=2, Wf=2, strideh=2, stridew=2) 
+      ## layer 3:  affine3 -> relu3
+      outa3 = affine::forward(outp2, W3, b3)
+      outr3 = relu::forward(outa3)
+      ## layer 4:  affine4 -> softmax
+      outa4 = affine::forward(outr3, W4, b4)
+      probs = softmax::forward(outa4)
+
+      # Compute loss & accuracy for training & validation data every 100 
iterations.
+      if (i %% 100 == 0) {
+        # Compute training loss & accuracy
+        loss_data = cross_entropy_loss::forward(probs, y_batch)
+        loss_reg_W1 = l2_reg::forward(W1, lambda)
+        loss_reg_W2 = l2_reg::forward(W2, lambda)
+        loss_reg_W3 = l2_reg::forward(W3, lambda)
+        loss_reg_W4 = l2_reg::forward(W4, lambda)
+        loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + 
loss_reg_W4
+        accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch))
+
+        # Compute validation loss & accuracy
+        [loss_val, accuracy_val] = eval(X_val, y_val, C, Hin, Win, W1, b1, W2, 
b2, W3, b3, W4, b4)
+
+        # Output results
+        print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", 
Train Accuracy: " + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + 
accuracy_val)
+      }
+
+      # Compute data backward pass
+      ## loss:
+      dprobs = cross_entropy_loss::backward(probs, y_batch)
+      ## layer 4:  affine4 -> softmax
+      douta4 = softmax::backward(dprobs, outa4)
+      [doutr3, dW4, db4] = affine::backward(douta4, outr3, W4, b4)
+      ## layer 3:  affine3 -> relu3
+      douta3 = relu::backward(doutr3, outa3)
+      [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)
+      ## layer 2: conv2 -> relu2 -> pool2
+      doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, 
Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
+      doutc2 = relu::backward(doutr2, outc2)
+      [doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, 
b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
+      ## layer 1: conv1 -> relu1 -> pool1
+      doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, 
Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
+      doutc1 = relu::backward(doutr1, outc1)
+      [dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, 
W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+
+      # Compute regularization backward pass
+      dW1_reg = l2_reg::backward(W1, lambda)
+      dW2_reg = l2_reg::backward(W2, lambda)
+      dW3_reg = l2_reg::backward(W3, lambda)
+      dW4_reg = l2_reg::backward(W4, lambda)
+      dW1 = dW1 + dW1_reg
+      dW2 = dW2 + dW2_reg
+      dW3 = dW3 + dW3_reg
+      dW4 = dW4 + dW4_reg
+
+      # Optimize with SGD w/ Nesterov momentum
+      [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
+      [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1)
+      [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2)
+      [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2)
+      [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3)
+      [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3)
+      [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4)
+      [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4)
+    }
+    # Anneal momentum towards 0.999
+    #mu = mu + (0.999 - mu)/(1+epochs-e)
+    # Decay learning rate
+    lr = lr * decay
+  }
+}
+
+eval = function(matrix[double] X, matrix[double] y, int C, int Hin, int Win,
+                matrix[double] W1, matrix[double] b1,
+                matrix[double] W2, matrix[double] b2,
+                matrix[double] W3, matrix[double] b3,
+                matrix[double] W4, matrix[double] b4)
+    return (double loss, double accuracy) {
+  /*
+   * Evaluates a convolutional net using the "LeNet" architecture.
+   *
+   * The input matrix, X, has N examples, each represented as a 3D
+   * volume unrolled into a single vector.  The targets, y, have K
+   * classes, and are one-hot encoded.
+   *
+   * Inputs:
+   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - y: Target matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
+   *  - b1: 1st layer biases vector, of shape (F1, 1).
+   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
+   *  - b2: 2nd layer biases vector, of shape (F2, 1).
+   *  - W3: 3rd layer weights (parameters) matrix, of shape 
(F2*(Hin/4)*(Win/4), N3).
+   *  - b3: 3rd layer biases vector, of shape (1, N3).
+   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
+   *  - b4: 4th layer biases vector, of shape (1, K).
+   *
+   * Outputs:
+   *  - loss: Scalar loss, of shape (1).
+   *  - accuracy: Scalar accuracy, of shape (1).
+   */
+  # Eval network:
+  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> 
affine4 -> softmax
+  Hf = 5  # filter height
+  Wf = 5  # filter width
+  stride = 1
+  pad = 2  # For same dimensions, (Hf - stride) / 2
+
+  F1 = nrow(W1)  # num conv filters in conv1
+  F2 = nrow(W2)  # num conv filters in conv2
+  N3 = ncol(W3)  # num nodes in affine3
+  K = ncol(W4)  # num nodes in affine4, equal to number of target dimensions 
(num classes)
+
+  # Compute forward pass
+  ## layer 1: conv1 -> relu1 -> pool1
+  [outc1, Houtc1, Woutc1] = conv::forward(X, W1, b1, C, Hin, Win, Hf, Wf, 
stride, stride, pad, pad)
+  outr1 = relu::forward(outc1)
+  [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, 
Wf=2, strideh=2, stridew=2) 
+  ## layer 2: conv2 -> relu2 -> pool2
+  [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, 
Hf, Wf, stride, stride, pad, pad)
+  outr2 = relu::forward(outc2)
+  [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, 
Wf=2, strideh=2, stridew=2) 
+  ## layer 3:  affine3 -> relu3
+  outa3 = affine::forward(outp2, W3, b3)
+  outr3 = relu::forward(outa3)
+  ## layer 4:  affine4 -> softmax
+  outa4 = affine::forward(outr3, W4, b4)
+  probs = softmax::forward(outa4)
+
+  # Compute loss & accuracy
+  loss = cross_entropy_loss::forward(probs, y)
+  correct_pred = rowIndexMax(probs) == rowIndexMax(y)
+  accuracy = mean(correct_pred)
+}
+
+generate_dummy_data = function()
+    return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) {
+  /*
+   * Generate a dummy dataset similar to the MNIST dataset.
+   *
+   * Outputs:
+   *  - X: Input data matrix, of shape (N, D).
+   *  - y: Target matrix, of shape (N, K).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   */
+  # Generate dummy input data
+  N = 1024  # num examples
+  C = 1  # num input channels
+  Hin = 28  # input height
+  Win = 28  # input width
+  K = 10  # num target classes
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+  classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform"))
+  y = table(seq(1, N), classes)  # one-hot encoding
+}
+
+
+#
+# Main
+#
+# This runs if called as a script.
+#
+# The MNIST dataset contains labeled images of handwritten digits,
+# where each example is a 28x28 pixel image of grayscale values in
+# the range [0,255] stretched out as 784 pixels, and each label is
+# one of 10 possible digits in [0,9].
+#
+# Here, we assume 60,000 training examples, and 10,000 test examples,
+# where the format is "label, pixel_1, pixel_2, ..., pixel_n".
+#
+# 1. Download data
+#   ```
+#   examples/get_mnist_data.sh
+#   ```
+#
+# 2. Execute using Spark
+#   ```
+#   $SPARK_HOME/bin/spark-submit --master local[*] --driver-memory 10G
+#   --conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128
+#   $SYSTEMML_HOME/target/SystemML.jar -f examples/mnist_lenet.dml
+#   -nvargs train=examples/data/mnist/mnist_train.csv
+#   test=examples/data/mnist/mnist_test.csv C=1 Hin=28 Win=28 
out_dir=examples/model/mnist_lenet
+#   ```
+#
+
+# Read training data
+train = read($train, format="csv")
+test = read($test, format="csv")
+C = $C
+Hin = $Hin
+Win = $Win
+
+# Extract images and labels
+images = train[,2:ncol(train)]
+labels = train[,1]
+X_test = test[,2:ncol(test)]
+y_test = test[,1]
+
+# Scale images to [-1,1], and one-hot encode the labels
+n = nrow(train)
+n_test = nrow(test)
+images = (images / 255.0) * 2 - 1
+labels = table(seq(1, n), labels+1, n, 10)
+X_test = X_test / 255.0
+y_test = table(seq(1, n_test), y_test+1, n_test, 10)
+
+# Split into training (55,000 examples) and validation (5,000 examples)
+X = images[5001:nrow(images),]
+X_val = images[1:5000,]
+y = labels[5001:nrow(images),]
+y_val = labels[1:5000,]
+
+# Train
+[W1, b1, W2, b2, W3, b3, W4, b4] = train(X, y, X_val, y_val, C, Hin, Win)
+
+# Write model out
+write(W1, $out_dir+"/W1")
+write(b1, $out_dir+"/b1")
+write(W2, $out_dir+"/W2")
+write(b2, $out_dir+"/b2")
+write(W3, $out_dir+"/W3")
+write(b3, $out_dir+"/b3")
+write(W4, $out_dir+"/W4")
+write(b4, $out_dir+"/b4")
+
+# Eval on test set
+[loss, accuracy] = eval(X_test, y_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, 
W4, b4)
+
+# Output results
+print("Test Accuracy: " + accuracy)
+write(accuracy, $out_dir+"/accuracy")
+
+print("")
+print("")
+

Reply via email to