Repository: incubator-systemml Updated Branches: refs/heads/master 2f3ab9880 -> 9f12b5c66
[SYSTEMML-867] Updating the deep learning notebook examples to use the new Python MLContext API. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9f12b5c6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9f12b5c6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9f12b5c6 Branch: refs/heads/master Commit: 9f12b5c6650a74749cfc17bf0318f3f8d699e490 Parents: 2f3ab98 Author: Mike Dusenberry <[email protected]> Authored: Wed Aug 24 17:52:50 2016 -0700 Committer: Mike Dusenberry <[email protected]> Committed: Wed Aug 24 17:52:50 2016 -0700 ---------------------------------------------------------------------- .../examples/Example - MNIST LeNet.ipynb | 117 +++++++------------ .../Example - MNIST Softmax Classifier.ipynb | 78 +++++-------- 2 files changed, 75 insertions(+), 120 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9f12b5c6/scripts/staging/SystemML-NN/examples/Example - MNIST LeNet.ipynb ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/examples/Example - MNIST LeNet.ipynb b/scripts/staging/SystemML-NN/examples/Example - MNIST LeNet.ipynb index 18a94b5..4ed79d5 100644 --- a/scripts/staging/SystemML-NN/examples/Example - MNIST LeNet.ipynb +++ b/scripts/staging/SystemML-NN/examples/Example - MNIST LeNet.ipynb @@ -16,10 +16,10 @@ "outputs": [], "source": [ "# Add SystemML PySpark API file.\n", - "sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/branch-0.10/src/main/java/org/apache/sysml/api/python/SystemML.py\")\n", + "sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/2f3ab98804dace8b1ac1fef70d841c8df88290ab/src/main/python/SystemML.py\")\n", "\n", "# Create a SystemML MLContext object\n", - "from SystemML import MLContext\n", + "from SystemML import MLContext, dml\n", "ml = MLContext(sc)" ] }, @@ -74,15 +74,12 @@ }, "outputs": [], "source": [ - "script = \"\"\"\n", + "script_string = \"\"\"\n", "source(\"mnist_lenet.dml\") as mnist_lenet\n", "\n", "# Read training data\n", "data = read($data, format=\"csv\")\n", "n = nrow(data)\n", - "C = $C\n", - "Hin = $Hin\n", - "Win = $Win\n", "\n", "# Extract images and labels\n", "images = data[,2:ncol(data)]\n", @@ -100,57 +97,19 @@ "\n", "# Train\n", "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win)\n", - "\n", - "# Write model out\n", - "write(W1, $W1out)\n", - "write(b1, $b1out)\n", - "write(W2, $W2out)\n", - "write(b2, $b2out)\n", - "write(W3, $W3out)\n", - "write(b3, $b3out)\n", - "write(W4, $W4out)\n", - "write(b4, $b4out)\n", - "\n", - "print(\"\")\n", - "print(\"\")\n", "\"\"\"\n", - "ml.reset()\n", - "out = ml.executeScript(script, {\"data\": \"data/mnist/mnist_train.csv\",\n", - " \"C\": 1, \"Hin\": 28, \"Win\": 28},\n", - " outputs=[\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Extract model from SystemML back into PySpark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Extract variables\n", - "W1 = out.getDF(sqlContext, \"W1\").sort(\"ID\").drop(\"ID\")\n", - "b1 = out.getDF(sqlContext, \"b1\").sort(\"ID\").drop(\"ID\")\n", - "W2 = out.getDF(sqlContext, \"W2\").sort(\"ID\").drop(\"ID\")\n", - "b2 = out.getDF(sqlContext, \"b2\").sort(\"ID\").drop(\"ID\")\n", - "W3 = out.getDF(sqlContext, \"W3\").sort(\"ID\").drop(\"ID\")\n", - "b3 = out.getDF(sqlContext, \"b3\").sort(\"ID\").drop(\"ID\")\n", - "W4 = out.getDF(sqlContext, \"W4\").sort(\"ID\").drop(\"ID\")\n", - "b4 = out.getDF(sqlContext, \"b4\").sort(\"ID\").drop(\"ID\")" + "script = (dml(script_string).input(\"$data\", \"data/mnist/mnist_train.csv\")\n", + " .input(C=1, Hin=28, Win=28)\n", + " .out(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))\n", + "W1, b1, W2, b2, W3, b3, W4, b4 = (ml.execute(script)\n", + " .get(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Compute Test Accuracy" + "### 2. Compute Test Accuracy" ] }, { @@ -161,15 +120,12 @@ }, "outputs": [], "source": [ - "script = \"\"\"\n", + "script_string = \"\"\"\n", "source(\"mnist_lenet.dml\") as mnist_lenet\n", "\n", "# Read test data\n", "data = read($data, format=\"csv\")\n", "n = nrow(data)\n", - "C = $C\n", - "Hin = $Hin\n", - "Win = $Win\n", "\n", "# Extract images and labels\n", "X_test = data[,2:ncol(data)]\n", @@ -179,32 +135,45 @@ "X_test = (X_test / 255.0) * 2 - 1\n", "y_test = table(seq(1, n), y_test+1, n, 10)\n", "\n", - "# Read model coefficients\n", - "W1 = read($W1)\n", - "b1 = read($b1)\n", - "W2 = read($W2)\n", - "b2 = read($b2)\n", - "W3 = read($W3)\n", - "b3 = read($b3)\n", - "W4 = read($W4)\n", - "b4 = read($b4)\n", - "\n", "# Eval on test set\n", "probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n", "[loss, accuracy] = mnist_lenet::eval(probs, y_test)\n", "\n", "print(\"Test Accuracy: \" + accuracy)\n", - "\n", - "print(\"\")\n", - "print(\"\")\n", "\"\"\"\n", - "ml.reset()\n", - "ml.executeScript(script, {\"data\": \"data/mnist/mnist_train.csv\",\n", - " \"C\": 1, \"Hin\": 28, \"Win\": 28,\n", - " \"W1\": W1, \"b1\": b1,\n", - " \"W2\": W2, \"b2\": b2,\n", - " \"W3\": W3, \"b3\": b3,\n", - " \"W4\": W4, \"b4\": b4})" + "script = dml(script_string).input(**{\"$data\": \"data/mnist/mnist_train.csv\",\n", + " \"C\": 1, \"Hin\": 28, \"Win\": 28,\n", + " \"W1\": W1, \"b1\": b1,\n", + " \"W2\": W2, \"b2\": b2,\n", + " \"W3\": W3, \"b3\": b3,\n", + " \"W4\": W4, \"b4\": b4})\n", + "ml.execute(script)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Extract Model Into Spark DataFrames For Future Use" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "W1_df = W1.toDF()\n", + "b1_df = b1.toDF()\n", + "W2_df = W2.toDF()\n", + "b2_df = b2.toDF()\n", + "W3_df = W3.toDF()\n", + "b3_df = b3.toDF()\n", + "W4_df = W4.toDF()\n", + "b4_df = b4.toDF()\n", + "W1_df, b1_df, W2_df, b2_df, W3_df, b3_df, W4_df, b4_df" ] } ], http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9f12b5c6/scripts/staging/SystemML-NN/examples/Example - MNIST Softmax Classifier.ipynb ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/examples/Example - MNIST Softmax Classifier.ipynb b/scripts/staging/SystemML-NN/examples/Example - MNIST Softmax Classifier.ipynb index 1a73294..f383ef8 100644 --- a/scripts/staging/SystemML-NN/examples/Example - MNIST Softmax Classifier.ipynb +++ b/scripts/staging/SystemML-NN/examples/Example - MNIST Softmax Classifier.ipynb @@ -11,15 +11,16 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "scrolled": false }, "outputs": [], "source": [ "# Add SystemML PySpark API file.\n", - "sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/branch-0.10/src/main/java/org/apache/sysml/api/python/SystemML.py\")\n", + "sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/2f3ab98804dace8b1ac1fef70d841c8df88290ab/src/main/python/SystemML.py\")\n", "\n", "# Create a SystemML MLContext object\n", - "from SystemML import MLContext\n", + "from SystemML import MLContext, dml\n", "ml = MLContext(sc)" ] }, @@ -41,7 +42,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": true }, "outputs": [], "source": [ @@ -74,7 +75,7 @@ }, "outputs": [], "source": [ - "script = \"\"\"\n", + "training = \"\"\"\n", "source(\"mnist_softmax.dml\") as mnist_softmax\n", "\n", "# Read training data\n", @@ -97,43 +98,16 @@ "\n", "# Train\n", "[W, b] = mnist_softmax::train(X, y, X_val, y_val)\n", - "\n", - "# Write model out (we will extract these back into PySpark)\n", - "write(W, $Wout)\n", - "write(b, $bout)\n", - "\n", - "print(\"\")\n", - "print(\"\")\n", "\"\"\"\n", - "ml.reset()\n", - "out = ml.executeScript(script, {\"data\": \"data/mnist/mnist_train.csv\"},\n", - " outputs=[\"W\", \"b\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Extract model from SystemML back into PySpark" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "W = out.getDF(sqlContext, \"W\").sort(\"ID\").drop(\"ID\")\n", - "b = out.getDF(sqlContext, \"b\").sort(\"ID\").drop(\"ID\")" + "script = dml(training).input(\"$data\", \"data/mnist/mnist_train.csv\").out(\"W\", \"b\")\n", + "W, b = ml.execute(script).get(\"W\", \"b\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Compute Test Accuracy" + "### 2. Compute Test Accuracy" ] }, { @@ -144,7 +118,7 @@ }, "outputs": [], "source": [ - "script = \"\"\"\n", + "testing = \"\"\"\n", "source(\"mnist_softmax.dml\") as mnist_softmax\n", "\n", "# Read test data\n", @@ -159,22 +133,34 @@ "X_test = X_test / 255.0\n", "y_test = table(seq(1, n), y_test+1, n, 10)\n", "\n", - "# Read model coefficients\n", - "W = read($W)\n", - "b = read($b)\n", - "\n", "# Eval on test set\n", "probs = mnist_softmax::predict(X_test, W, b)\n", "[loss, accuracy] = mnist_softmax::eval(probs, y_test)\n", "\n", "print(\"Test Accuracy: \" + accuracy)\n", - "\n", - "print(\"\")\n", - "print(\"\")\n", "\"\"\"\n", - "ml.reset()\n", - "out = ml.executeScript(script, {\"data\": \"data/mnist/mnist_test.csv\",\n", - " \"W\": W, \"b\": b})" + "script = dml(testing).input(\"$data\", \"data/mnist/mnist_test.csv\", W=W, b=b)\n", + "ml.execute(script)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Extract Model Into Spark DataFrames For Future Use" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "W_df = W.toDF()\n", + "b_df = b.toDF()\n", + "W_df, b_df" ] } ],
