[SYSTEMML-618] SystemML-NN: Adding an MNIST "LeNet" neural net example, including a DML script and a Jupyter PySpark notebook.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3a3c1659 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3a3c1659 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3a3c1659 Branch: refs/heads/master Commit: 3a3c16599433fb9a0a7b00cc2d8b7f8a0ca53b15 Parents: d47fe8f Author: Mike Dusenberry <[email protected]> Authored: Fri Jun 24 17:34:31 2016 -0700 Committer: Mike Dusenberry <[email protected]> Committed: Fri Jun 24 17:34:32 2016 -0700 ---------------------------------------------------------------------- .../SystemML-NN/Example - MNIST LeNet.ipynb | 231 ++++++++++++ .../SystemML-NN/examples/mnist_lenet.dml | 360 +++++++++++++++++++ 2 files changed, 591 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3a3c1659/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb b/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb new file mode 100644 index 0000000..1926f3b --- /dev/null +++ b/scripts/staging/SystemML-NN/Example - MNIST LeNet.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Quick Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Add SystemML PySpark API file.\n", + "sc.addPyFile(\"https://raw.githubusercontent.com/apache/incubator-systemml/3d5f9b11741f6d6ecc6af7cbaa1069cde32be838/src/main/java/org/apache/sysml/api/python/SystemML.py\")\n", + "\n", + "# Create a SystemML MLContext object\n", + "from SystemML import MLContext\n", + "ml = MLContext(sc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Data - MNIST" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%%sh\n", + "mkdir -p examples/data/mnist/\n", + "cd examples/data/mnist/\n", + "curl -O http://pjreddie.com/media/files/mnist_train.csv\n", + "curl -O http://pjreddie.com/media/files/mnist_test.csv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SystemML \"LeNet\" Neural Network" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "script = \"\"\"\n", + "source(\"examples/mnist_lenet.dml\") as mnist_lenet\n", + "\n", + "# Read training data\n", + "data = read($data, format=\"csv\")\n", + "n = nrow(data)\n", + "C = $C\n", + "Hin = $Hin\n", + "Win = $Win\n", + "\n", + "# Extract images and labels\n", + "images = data[,2:ncol(data)]\n", + "labels = data[,1]\n", + "\n", + "# Scale images to [-1,1], and one-hot encode the labels\n", + "images = (images / 255.0) * 2 - 1\n", + "labels = table(seq(1, n), labels+1, n, 10)\n", + "\n", + "# Split into training (55,000 examples) and validation (5,000 examples)\n", + "X = images[5001:nrow(images),]\n", + "X_val = images[1:5000,]\n", + "y = labels[5001:nrow(images),]\n", + "y_val = labels[1:5000,]\n", + "\n", + "# Train\n", + "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win)\n", + "\n", + "# Write model out\n", + "write(W1, $W1out)\n", + "write(b1, $b1out)\n", + "write(W2, $W2out)\n", + "write(b2, $b2out)\n", + "write(W3, $W3out)\n", + "write(b3, $b3out)\n", + "write(W4, $W4out)\n", + "write(b4, $b4out)\n", + "\n", + "print(\"\")\n", + "print(\"\")\n", + "\"\"\"\n", + "ml.reset()\n", + "out = ml.executeScript(script, {\"data\": \"examples/data/mnist/mnist_train.csv\",\n", + " \"C\": 1, \"Hin\": 28, \"Win\": 28},\n", + " outputs=[\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Extract model from SystemML back into PySpark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Extract variables\n", + "W1 = out.getDF(sqlContext, \"W1\").sort(\"ID\").drop(\"ID\")\n", + "b1 = out.getDF(sqlContext, \"b1\").sort(\"ID\").drop(\"ID\")\n", + "W2 = out.getDF(sqlContext, \"W2\").sort(\"ID\").drop(\"ID\")\n", + "b2 = out.getDF(sqlContext, \"b2\").sort(\"ID\").drop(\"ID\")\n", + "W3 = out.getDF(sqlContext, \"W3\").sort(\"ID\").drop(\"ID\")\n", + "b3 = out.getDF(sqlContext, \"b3\").sort(\"ID\").drop(\"ID\")\n", + "W4 = out.getDF(sqlContext, \"W4\").sort(\"ID\").drop(\"ID\")\n", + "b4 = out.getDF(sqlContext, \"b4\").sort(\"ID\").drop(\"ID\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Compute Test Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "script = \"\"\"\n", + "source(\"examples/mnist_lenet.dml\") as mnist_lenet\n", + "\n", + "# Read test data\n", + "data = read($data, format=\"csv\")\n", + "n = nrow(data)\n", + "C = $C\n", + "Hin = $Hin\n", + "Win = $Win\n", + "\n", + "# Extract images and labels\n", + "X_test = data[,2:ncol(data)]\n", + "y_test = data[,1]\n", + "\n", + "# Scale images to [-1,1], and one-hot encode the labels\n", + "X_test = (X_test / 255.0) * 2 - 1\n", + "y_test = table(seq(1, n), y_test+1, n, 10)\n", + "\n", + "# Read model coefficients\n", + "W1 = read($W1)\n", + "b1 = read($b1)\n", + "W2 = read($W2)\n", + "b2 = read($b2)\n", + "W3 = read($W3)\n", + "b3 = read($b3)\n", + "W4 = read($W4)\n", + "b4 = read($b4)\n", + "\n", + "# Eval on test set\n", + "[loss, accuracy] = mnist_lenet::eval(X_test, y_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n", + "\n", + "print(\"Test ;Accuracy: \" + accuracy)\n", + "\n", + "print(\"\")\n", + "print(\"\")\n", + "\"\"\"\n", + "ml.reset()\n", + "ml.executeScript(script, {\"data\": \"examples/data/mnist/mnist_train.csv\",\n", + " \"C\": 1, \"Hin\": 28, \"Win\": 28,\n", + " \"W1\": W1, \"b1\": b1,\n", + " \"W2\": W2, \"b2\": b2,\n", + " \"W3\": W3, \"b3\": b3,\n", + " \"W4\": W4, \"b4\": b4})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3a3c1659/scripts/staging/SystemML-NN/examples/mnist_lenet.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml new file mode 100644 index 0000000..bd6361f --- /dev/null +++ b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml @@ -0,0 +1,360 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * MNIST LeNet Example + */ +# Imports +source("nn/layers/affine.dml") as affine +source("nn/layers/conv_builtin.dml") as conv +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/l2_reg.dml") as l2_reg +source("nn/layers/max_pool_builtin.dml") as max_pool +source("nn/layers/relu.dml") as relu +source("nn/layers/softmax.dml") as softmax +source("nn/optim/sgd_nesterov.dml") as sgd_nesterov + +train = function(matrix[double] X, matrix[double] y, + matrix[double] X_val, matrix[double] y_val, + int C, int Hin, int Win) + return (matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2, + matrix[double] W3, matrix[double] b3, + matrix[double] W4, matrix[double] b4) { + /* + * Trains a convolutional net using the "LeNet" architecture. + * + * The input matrix, X, has N examples, each represented as a 3D + * volume unrolled into a single vector. The targets, y, have K + * classes, and are one-hot encoded. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - y: Target matrix, of shape (N, K). + * - X_val: Input validation data matrix, of shape (N, C*Hin*Win). + * - y_val: Target validation matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * + * Outputs: + * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). + * - b1: 1st layer biases vector, of shape (F1, 1). + * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). + * - b2: 2nd layer biases vector, of shape (F2, 1). + * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). + * - b3: 3rd layer biases vector, of shape (1, N3). + * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). + * - b4: 4th layer biases vector, of shape (1, K). + */ + N = nrow(X) + K = ncol(y) + + # Create network: + # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax + Hf = 5 # filter height + Wf = 5 # filter width + stride = 1 + pad = 2 # For same dimensions, (Hf - stride) / 2 + + F1 = 32 # num conv filters in conv1 + F2 = 64 # num conv filters in conv2 + N3 = 512 # num nodes in affine3 + # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes) + + [W1, b1] = conv::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) + [W2, b2] = conv::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) + [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3) # inputs: (N, F2*(Hin/2/2)*(Win/2/2)) + [W4, b4] = affine::init(N3, K) # inputs: (N, N3) + W4 = W4 / sqrt(2) # different initialization, since being fed into softmax, instead of relu + + # Initialize SGD w/ Nesterov momentum optimizer + lr = 0.01 # learning rate + mu = 0.9 #0.5 # momentum + decay = 0.95 # learning rate decay constant + vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1) + vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2) + vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3) + vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4) + + # Regularization + lambda = 5e-04 + + # Optimize + print("Starting optimization") + batch_size = 64 + epochs = 10 + iters = ceil(N / batch_size) + for (e in 1:epochs) { + for(i in 1:iters) { + # Get next batch + beg = ((i-1) * batch_size) %% N + 1 + end = min(N, beg + batch_size - 1) + X_batch = X[beg:end,] + y_batch = y[beg:end,] + + # Compute forward pass + ## layer 1: conv1 -> relu1 -> pool1 + [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + outr1 = relu::forward(outc1) + [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + ## layer 2: conv2 -> relu2 -> pool2 + [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) + outr2 = relu::forward(outc2) + [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + ## layer 3: affine3 -> relu3 + outa3 = affine::forward(outp2, W3, b3) + outr3 = relu::forward(outa3) + ## layer 4: affine4 -> softmax + outa4 = affine::forward(outr3, W4, b4) + probs = softmax::forward(outa4) + + # Compute loss & accuracy for training & validation data every 100 iterations. + if (i %% 100 == 0) { + # Compute training loss & accuracy + loss_data = cross_entropy_loss::forward(probs, y_batch) + loss_reg_W1 = l2_reg::forward(W1, lambda) + loss_reg_W2 = l2_reg::forward(W2, lambda) + loss_reg_W3 = l2_reg::forward(W3, lambda) + loss_reg_W4 = l2_reg::forward(W4, lambda) + loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + loss_reg_W4 + accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch)) + + # Compute validation loss & accuracy + [loss_val, accuracy_val] = eval(X_val, y_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) + + # Output results + print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) + } + + # Compute data backward pass + ## loss: + dprobs = cross_entropy_loss::backward(probs, y_batch) + ## layer 4: affine4 -> softmax + douta4 = softmax::backward(dprobs, outa4) + [doutr3, dW4, db4] = affine::backward(douta4, outr3, W4, b4) + ## layer 3: affine3 -> relu3 + douta3 = relu::backward(doutr3, outa3) + [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3) + ## layer 2: conv2 -> relu2 -> pool2 + doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + doutc2 = relu::backward(doutr2, outc2) + [doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) + ## layer 1: conv1 -> relu1 -> pool1 + doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + doutc1 = relu::backward(doutr1, outc1) + [dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + + # Compute regularization backward pass + dW1_reg = l2_reg::backward(W1, lambda) + dW2_reg = l2_reg::backward(W2, lambda) + dW3_reg = l2_reg::backward(W3, lambda) + dW4_reg = l2_reg::backward(W4, lambda) + dW1 = dW1 + dW1_reg + dW2 = dW2 + dW2_reg + dW3 = dW3 + dW3_reg + dW4 = dW4 + dW4_reg + + # Optimize with SGD w/ Nesterov momentum + [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1) + [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1) + [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2) + [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2) + [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3) + [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3) + [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4) + [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4) + } + # Anneal momentum towards 0.999 + #mu = mu + (0.999 - mu)/(1+epochs-e) + # Decay learning rate + lr = lr * decay + } +} + +eval = function(matrix[double] X, matrix[double] y, int C, int Hin, int Win, + matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2, + matrix[double] W3, matrix[double] b3, + matrix[double] W4, matrix[double] b4) + return (double loss, double accuracy) { + /* + * Evaluates a convolutional net using the "LeNet" architecture. + * + * The input matrix, X, has N examples, each represented as a 3D + * volume unrolled into a single vector. The targets, y, have K + * classes, and are one-hot encoded. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - y: Target matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). + * - b1: 1st layer biases vector, of shape (F1, 1). + * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). + * - b2: 2nd layer biases vector, of shape (F2, 1). + * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). + * - b3: 3rd layer biases vector, of shape (1, N3). + * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). + * - b4: 4th layer biases vector, of shape (1, K). + * + * Outputs: + * - loss: Scalar loss, of shape (1). + * - accuracy: Scalar accuracy, of shape (1). + */ + # Eval network: + # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax + Hf = 5 # filter height + Wf = 5 # filter width + stride = 1 + pad = 2 # For same dimensions, (Hf - stride) / 2 + + F1 = nrow(W1) # num conv filters in conv1 + F2 = nrow(W2) # num conv filters in conv2 + N3 = ncol(W3) # num nodes in affine3 + K = ncol(W4) # num nodes in affine4, equal to number of target dimensions (num classes) + + # Compute forward pass + ## layer 1: conv1 -> relu1 -> pool1 + [outc1, Houtc1, Woutc1] = conv::forward(X, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + outr1 = relu::forward(outc1) + [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + ## layer 2: conv2 -> relu2 -> pool2 + [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) + outr2 = relu::forward(outc2) + [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + ## layer 3: affine3 -> relu3 + outa3 = affine::forward(outp2, W3, b3) + outr3 = relu::forward(outa3) + ## layer 4: affine4 -> softmax + outa4 = affine::forward(outr3, W4, b4) + probs = softmax::forward(outa4) + + # Compute loss & accuracy + loss = cross_entropy_loss::forward(probs, y) + correct_pred = rowIndexMax(probs) == rowIndexMax(y) + accuracy = mean(correct_pred) +} + +generate_dummy_data = function() + return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) { + /* + * Generate a dummy dataset similar to the MNIST dataset. + * + * Outputs: + * - X: Input data matrix, of shape (N, D). + * - y: Target matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + */ + # Generate dummy input data + N = 1024 # num examples + C = 1 # num input channels + Hin = 28 # input height + Win = 28 # input width + K = 10 # num target classes + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform")) + y = table(seq(1, N), classes) # one-hot encoding +} + + +# +# Main +# +# This runs if called as a script. +# +# The MNIST dataset contains labeled images of handwritten digits, +# where each example is a 28x28 pixel image of grayscale values in +# the range [0,255] stretched out as 784 pixels, and each label is +# one of 10 possible digits in [0,9]. +# +# Here, we assume 60,000 training examples, and 10,000 test examples, +# where the format is "label, pixel_1, pixel_2, ..., pixel_n". +# +# 1. Download data +# ``` +# examples/get_mnist_data.sh +# ``` +# +# 2. Execute using Spark +# ``` +# $SPARK_HOME/bin/spark-submit --master local[*] --driver-memory 10G +# --conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128 +# $SYSTEMML_HOME/target/SystemML.jar -f examples/mnist_lenet.dml +# -nvargs train=examples/data/mnist/mnist_train.csv +# test=examples/data/mnist/mnist_test.csv C=1 Hin=28 Win=28 out_dir=examples/model/mnist_lenet +# ``` +# + +# Read training data +train = read($train, format="csv") +test = read($test, format="csv") +C = $C +Hin = $Hin +Win = $Win + +# Extract images and labels +images = train[,2:ncol(train)] +labels = train[,1] +X_test = test[,2:ncol(test)] +y_test = test[,1] + +# Scale images to [-1,1], and one-hot encode the labels +n = nrow(train) +n_test = nrow(test) +images = (images / 255.0) * 2 - 1 +labels = table(seq(1, n), labels+1, n, 10) +X_test = X_test / 255.0 +y_test = table(seq(1, n_test), y_test+1, n_test, 10) + +# Split into training (55,000 examples) and validation (5,000 examples) +X = images[5001:nrow(images),] +X_val = images[1:5000,] +y = labels[5001:nrow(images),] +y_val = labels[1:5000,] + +# Train +[W1, b1, W2, b2, W3, b3, W4, b4] = train(X, y, X_val, y_val, C, Hin, Win) + +# Write model out +write(W1, $out_dir+"/W1") +write(b1, $out_dir+"/b1") +write(W2, $out_dir+"/W2") +write(b2, $out_dir+"/b2") +write(W3, $out_dir+"/W3") +write(b3, $out_dir+"/b3") +write(W4, $out_dir+"/W4") +write(b4, $out_dir+"/b4") + +# Eval on test set +[loss, accuracy] = eval(X_test, y_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) + +# Output results +print("Test Accuracy: " + accuracy) +write(accuracy, $out_dir+"/accuracy") + +print("") +print("") +
