[SYSTEMML-618] SystemML-NN: Updating the MNIST softmax classifier example to 
remove unnecessary dependencies, and to simplify the invocation.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d47fe8fc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d47fe8fc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d47fe8fc

Branch: refs/heads/master
Commit: d47fe8fc5ca39ce6c50944a2fbaaca37ba3ba134
Parents: 60f3fe6
Author: Mike Dusenberry <[email protected]>
Authored: Fri Jun 24 17:28:57 2016 -0700
Committer: Mike Dusenberry <[email protected]>
Committed: Fri Jun 24 17:28:57 2016 -0700

----------------------------------------------------------------------
 .../Example - MNIST Softmax Classifier.ipynb    | 84 +++++++++-----------
 .../SystemML-NN/examples/get_mnist_data.sh      | 28 +++++++
 .../SystemML-NN/examples/mnist_softmax.dml      | 58 +++++++++-----
 3 files changed, 107 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d47fe8fc/scripts/staging/SystemML-NN/Example
 - MNIST Softmax Classifier.ipynb
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/Example - MNIST Softmax 
Classifier.ipynb b/scripts/staging/SystemML-NN/Example - MNIST Softmax 
Classifier.ipynb
index 5c1de15..d7da0d2 100644
--- a/scripts/staging/SystemML-NN/Example - MNIST Softmax Classifier.ipynb      
+++ b/scripts/staging/SystemML-NN/Example - MNIST Softmax Classifier.ipynb      
@@ -15,8 +15,6 @@
    },
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "\n",
     "# Add SystemML PySpark API file.\n",
     
"sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/3d5f9b11741f6d6ecc6af7cbaa1069cde32be838/src/main/java/org/apache/sysml/api/python/SystemML.py\";)\n",
     "\n",
@@ -36,43 +34,22 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The MNIST dataset contains labeled images of handwritten digits, where 
each example is a 28x28 pixel image of grayscale values scaled to [0,1] and 
stretched out as 784 pixels, and each label is a one-hot encoding over 10 
possible digits.  Here, we use TensorFlow's API for accessing the data, and 
retrieve 50,000 training examples, 5,000 validation examples, and 10,000 test 
examples.  [Note: TensorFlow can easily be installed via [these 
instructions](https://www.tensorflow.org/versions/r0.9/get_started/os_setup.html#pip-installation).]"
+    "The MNIST dataset contains labeled images of handwritten digits, where 
each example is a 28x28 pixel image of grayscale values in the range [0,255] 
stretched out as 784 pixels, and each label is one of 10 possible digits in 
[0,9].  Here, we download 60,000 training examples, and 10,000 test examples, 
where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [],
    "source": [
     "%%sh\n",
-    "mkdir -p examples/data/mnist/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false,
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "import tensorflow as tf\n",
-    "from tensorflow.examples.tutorials.mnist import input_data\n",
-    "\n",
-    "# Get MNIST data\n",
-    "mnist = input_data.read_data_sets('MNIST_data', one_hot=True)\n",
-    "\n",
-    "# Save to CSV\n",
-    "np.savetxt(\"examples/data/mnist/train_images.csv\", mnist.train.images, 
delimiter=\",\")\n",
-    "np.savetxt(\"examples/data/mnist/train_labels.csv\", mnist.train.labels, 
delimiter=\",\")\n",
-    "np.savetxt(\"examples/data/mnist/val_images.csv\", 
mnist.validation.images, delimiter=\",\")\n",
-    "np.savetxt(\"examples/data/mnist/val_labels.csv\", 
mnist.validation.labels, delimiter=\",\")\n",
-    "np.savetxt(\"examples/data/mnist/test_images.csv\", mnist.test.images, 
delimiter=\",\")\n",
-    "np.savetxt(\"examples/data/mnist/test_labels.csv\", mnist.test.labels, 
delimiter=\",\")"
+    "mkdir -p examples/data/mnist/\n",
+    "cd examples/data/mnist/\n",
+    "curl -O http://pjreddie.com/media/files/mnist_train.csv\n";,
+    "curl -O http://pjreddie.com/media/files/mnist_test.csv";
    ]
   },
   {
@@ -100,17 +77,28 @@
     "script = \"\"\"\n",
     "source(\"examples/mnist_softmax.dml\") as mnist_softmax\n",
     "\n",
-    "# Read data\n",
-    "X = read($X, format=\"csv\")\n",
-    "y = read($y, format=\"csv\")\n",
+    "# Read training data\n",
+    "data = read($data, format=\"csv\")\n",
+    "n = nrow(data)\n",
+    "\n",
+    "# Extract images and labels\n",
+    "images = data[,2:ncol(data)]\n",
+    "labels = data[,1]\n",
     "\n",
-    "X_val = read($X_val, format=\"csv\")\n",
-    "y_val = read($y_val, format=\"csv\")\n",
+    "# Scale images to [0,1], and one-hot encode the labels\n",
+    "images = images / 255.0\n",
+    "labels = table(seq(1, n), labels+1, n, 10)\n",
+    "\n",
+    "# Split into training (55,000 examples) and validation (5,000 
examples)\n",
+    "X = images[5001:nrow(images),]\n",
+    "X_val = images[1:5000,]\n",
+    "y = labels[5001:nrow(images),]\n",
+    "y_val = labels[1:5000,]\n",
     "\n",
     "# Train\n",
     "[W, b] = mnist_softmax::train(X, y, X_val, y_val)\n",
     "\n",
-    "# Write model out\n",
+    "# Write model out (we will extract these back into PySpark)\n",
     "write(W, $Wout)\n",
     "write(b, $bout)\n",
     "\n",
@@ -118,10 +106,7 @@
     "print(\"\")\n",
     "\"\"\"\n",
     "ml.reset()\n",
-    "out = ml.executeScript(script, {\"X\": 
\"examples/data/mnist/train_images.csv\", \n",
-    "                                \"y\": 
\"examples/data/mnist/train_labels.csv\",\n",
-    "                                \"X_val\": 
\"examples/data/mnist/val_images.csv\", \n",
-    "                                \"y_val\": 
\"examples/data/mnist/val_labels.csv\"},\n",
+    "out = ml.executeScript(script, {\"data\": 
\"examples/data/mnist/mnist_train.csv\"},\n",
     "                       outputs=[\"W\", \"b\"])"
    ]
   },
@@ -162,23 +147,32 @@
     "script = \"\"\"\n",
     "source(\"examples/mnist_softmax.dml\") as mnist_softmax\n",
     "\n",
-    "# Read data & coefficients\n",
-    "X_test = read($X_test, format=\"csv\")\n",
-    "y_test = read($y_test, format=\"csv\")\n",
+    "# Read test data\n",
+    "data = read($data, format=\"csv\")\n",
+    "n = nrow(data)\n",
+    "\n",
+    "# Extract images and labels\n",
+    "X_test = data[,2:ncol(data)]\n",
+    "y_test = data[,1]\n",
+    "\n",
+    "# Scale images to [0,1], and one-hot encode the labels\n",
+    "X_test = X_test / 255.0\n",
+    "y_test = table(seq(1, n), y_test+1, n, 10)\n",
+    "\n",
+    "# Read model coefficients\n",
     "W = read($W)\n",
     "b = read($b)\n",
     "\n",
     "# Eval on test set\n",
     "[loss, accuracy] = mnist_softmax::eval(X_test, y_test, W, b)\n",
     "\n",
-    "print(\"Accuracy: \" + accuracy)\n",
+    "print(\"Test Accuracy: \" + accuracy)\n",
     "\n",
     "print(\"\")\n",
     "print(\"\")\n",
     "\"\"\"\n",
     "ml.reset()\n",
-    "out = ml.executeScript(script, {\"X_test\": 
\"examples/data/mnist/test_images.csv\",\n",
-    "                                \"y_test\": 
\"examples/data/mnist/test_labels.csv\",\n",
+    "out = ml.executeScript(script, {\"data\": 
\"examples/data/mnist/mnist_test.csv\",\n",
     "                                \"W\": W, \"b\": b})"
    ]
   }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d47fe8fc/scripts/staging/SystemML-NN/examples/get_mnist_data.sh
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/get_mnist_data.sh 
b/scripts/staging/SystemML-NN/examples/get_mnist_data.sh
new file mode 100755
index 0000000..6fed70b
--- /dev/null
+++ b/scripts/staging/SystemML-NN/examples/get_mnist_data.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+DIR="$(cd "$(dirname "$0")" && pwd)"
+mkdir -p $DIR/data/mnist/
+cd $DIR/data/mnist/
+curl -O http://pjreddie.com/media/files/mnist_train.csv
+curl -O http://pjreddie.com/media/files/mnist_test.csv
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d47fe8fc/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_softmax.dml 
b/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
index f3c47e9..8ba24cb 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
@@ -159,30 +159,52 @@ generate_dummy_data = function()
 #
 # This runs if called as a script.
 #
-# Ex:
+# The MNIST dataset contains labeled images of handwritten digits,
+# where each example is a 28x28 pixel image of grayscale values in
+# the range [0,255] stretched out as 784 pixels, and each label is
+# one of 10 possible digits in [0,9].
+#
+# Here, we assume 60,000 training examples, and 10,000 test examples,
+# where the format is "label, pixel_1, pixel_2, ..., pixel_n".
+#
+# 1. Download data
+#   ```
+#   examples/get_mnist_data.sh
+#   ```
+#
+# 2. Execute using Spark
 #   ```
 #   $SPARK_HOME/bin/spark-submit --master local[*] --driver-memory 5G
 #   --conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128
 #   $SYSTEMML_HOME/target/SystemML.jar -f examples/mnist_softmax.dml
-#   -nvargs X=examples/data/mnist/train_images.csv 
y=examples/data/mnist/train_labels.csv
-#   X_val=examples/data/mnist/val_images.csv 
y_val=examples/data/mnist/val_labels.csv
-#   X_test=examples/data/mnist/test_images.csv 
y_test=examples/data/mnist/test_labels.csv
-#   out_dir=examples/model/mnist_softmax
+#   -nvargs train=examples/data/mnist/mnist_train.csv
+#   test=examples/data/mnist/mnist_test.csv 
out_dir=examples/model/mnist_softmax
 #   ```
 #
-# The MNIST dataset contains labeled images of handwritten digits,
-# where each example is a 28x28 pixel image of grayscale values
-# scaled to [0,1] and stretched out as 784 pixels, and each label
-# is a one-hot encoding over 10 possible digits.
-#
 
-# Read data
-X = read($X, format="csv")
-y = read($y, format="csv")
-X_val = read($X_val, format="csv")
-y_val = read($y_val, format="csv")
-X_test = read($X_test, format="csv")
-y_test = read($y_test, format="csv")
+# Read training data
+train = read($train, format="csv")
+test = read($test, format="csv")
+
+# Extract images and labels
+images = train[,2:ncol(train)]
+labels = train[,1]
+X_test = test[,2:ncol(test)]
+y_test = test[,1]
+
+# Scale images to [0,1], and one-hot encode the labels
+n = nrow(train)
+n_test = nrow(test)
+images = images / 255.0
+labels = table(seq(1, n), labels+1, n, 10)
+X_test = X_test / 255.0
+y_test = table(seq(1, n_test), y_test+1, n_test, 10)
+
+# Split into training (55,000 examples) and validation (5,000 examples)
+X = images[5001:nrow(images),]
+X_val = images[1:5000,]
+y = labels[5001:nrow(images),]
+y_val = labels[1:5000,]
 
 # Train
 [W, b] = train(X, y, X_val, y_val)
@@ -195,7 +217,7 @@ write(b, $out_dir+"/b")
 [loss, accuracy] = eval(X_test, y_test, W, b)
 
 # Output results
-print("Accuracy: " + accuracy)
+print("Test Accuracy: " + accuracy)
 write(accuracy, $out_dir+"/accuracy")
 
 print("")

Reply via email to