Repository: incubator-systemml Updated Branches: refs/heads/master aa2211ac0 -> 43c321d18
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml new file mode 100644 index 0000000..e5755c4 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml @@ -0,0 +1,331 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * MNIST LeNet Example + */ +# Imports +source("nn/layers/affine.dml") as affine +source("nn/layers/conv2d_builtin.dml") as conv2d +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/dropout.dml") as dropout +source("nn/layers/l2_reg.dml") as l2_reg +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d +source("nn/layers/relu.dml") as relu +source("nn/layers/softmax.dml") as softmax +source("nn/optim/sgd_nesterov.dml") as sgd_nesterov + +train = function(matrix[double] X, matrix[double] y, + matrix[double] X_val, matrix[double] y_val, + int C, int Hin, int Win, int epochs) + return (matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2, + matrix[double] W3, matrix[double] b3, + matrix[double] W4, matrix[double] b4) { + /* + * Trains a convolutional net using the "LeNet" architecture. + * + * The input matrix, X, has N examples, each represented as a 3D + * volume unrolled into a single vector. The targets, y, have K + * classes, and are one-hot encoded. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - y: Target matrix, of shape (N, K). + * - X_val: Input validation data matrix, of shape (N, C*Hin*Win). + * - y_val: Target validation matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - epochs: Total number of full training loops over the full data set. + * + * Outputs: + * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). + * - b1: 1st layer biases vector, of shape (F1, 1). + * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). + * - b2: 2nd layer biases vector, of shape (F2, 1). + * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). + * - b3: 3rd layer biases vector, of shape (1, N3). + * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). + * - b4: 4th layer biases vector, of shape (1, K). + */ + N = nrow(X) + K = ncol(y) + + # Create network: + # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax + Hf = 5 # filter height + Wf = 5 # filter width + stride = 1 + pad = 2 # For same dimensions, (Hf - stride) / 2 + + F1 = 32 # num conv filters in conv1 + F2 = 64 # num conv filters in conv2 + N3 = 512 # num nodes in affine3 + # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes) + + [W1, b1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) + [W2, b2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) + [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3) # inputs: (N, F2*(Hin/2/2)*(Win/2/2)) + [W4, b4] = affine::init(N3, K) # inputs: (N, N3) + W4 = W4 / sqrt(2) # different initialization, since being fed into softmax, instead of relu + + # Initialize SGD w/ Nesterov momentum optimizer + lr = 0.01 # learning rate + mu = 0.9 #0.5 # momentum + decay = 0.95 # learning rate decay constant + vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1) + vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2) + vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3) + vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4) + + # Regularization + lambda = 5e-04 + + # Optimize + print("Starting optimization") + batch_size = 64 + iters = ceil(N / batch_size) + for (e in 1:epochs) { + for(i in 1:iters) { + # Get next batch + beg = ((i-1) * batch_size) %% N + 1 + end = min(N, beg + batch_size - 1) + X_batch = X[beg:end,] + y_batch = y[beg:end,] + + # Compute forward pass + ## layer 1: conv1 -> relu1 -> pool1 + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) + outr1 = relu::forward(outc1) + [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + ## layer 2: conv2 -> relu2 -> pool2 + [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, + stride, stride, pad, pad) + outr2 = relu::forward(outc2) + [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + ## layer 3: affine3 -> relu3 -> dropout + outa3 = affine::forward(outp2, W3, b3) + outr3 = relu::forward(outa3) + [outd3, maskd3] = dropout::forward(outr3, 0.5, -1) + ## layer 4: affine4 -> softmax + outa4 = affine::forward(outd3, W4, b4) + probs = softmax::forward(outa4) + + # Compute loss & accuracy for training & validation data every 100 iterations. + if (i %% 100 == 0) { + # Compute training loss & accuracy + loss_data = cross_entropy_loss::forward(probs, y_batch) + loss_reg_W1 = l2_reg::forward(W1, lambda) + loss_reg_W2 = l2_reg::forward(W2, lambda) + loss_reg_W3 = l2_reg::forward(W3, lambda) + loss_reg_W4 = l2_reg::forward(W4, lambda) + loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + loss_reg_W4 + accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch)) + + # Compute validation loss & accuracy + probs_val = predict(X_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) + loss_val = cross_entropy_loss::forward(probs_val, y_val) + accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val)) + + # Output results + print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " + + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) + } + + # Compute data backward pass + ## loss: + dprobs = cross_entropy_loss::backward(probs, y_batch) + ## layer 4: affine4 -> softmax + douta4 = softmax::backward(dprobs, outa4) + [doutd3, dW4, db4] = affine::backward(douta4, outr3, W4, b4) + ## layer 3: affine3 -> relu3 -> dropout + doutr3 = dropout::backward(doutd3, outr3, 0.5, maskd3) + douta3 = relu::backward(doutr3, outa3) + [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3) + ## layer 2: conv2 -> relu2 -> pool2 + doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + doutc2 = relu::backward(doutr2, outc2) + [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, + Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) + ## layer 1: conv1 -> relu1 -> pool1 + doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + doutc1 = relu::backward(doutr1, outc1) + [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, + Hf, Wf, stride, stride, pad, pad) + + # Compute regularization backward pass + dW1_reg = l2_reg::backward(W1, lambda) + dW2_reg = l2_reg::backward(W2, lambda) + dW3_reg = l2_reg::backward(W3, lambda) + dW4_reg = l2_reg::backward(W4, lambda) + dW1 = dW1 + dW1_reg + dW2 = dW2 + dW2_reg + dW3 = dW3 + dW3_reg + dW4 = dW4 + dW4_reg + + # Optimize with SGD w/ Nesterov momentum + [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1) + [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1) + [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2) + [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2) + [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3) + [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3) + [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4) + [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4) + } + # Anneal momentum towards 0.999 + #mu = mu + (0.999 - mu)/(1+epochs-e) + # Decay learning rate + lr = lr * decay + } +} + +predict = function(matrix[double] X, int C, int Hin, int Win, + matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2, + matrix[double] W3, matrix[double] b3, + matrix[double] W4, matrix[double] b4) + return (matrix[double] probs) { + /* + * Computes the class probability predictions of a convolutional + * net using the "LeNet" architecture. + * + * The input matrix, X, has N examples, each represented as a 3D + * volume unrolled into a single vector. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). + * - b1: 1st layer biases vector, of shape (F1, 1). + * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). + * - b2: 2nd layer biases vector, of shape (F2, 1). + * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). + * - b3: 3rd layer biases vector, of shape (1, N3). + * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). + * - b4: 4th layer biases vector, of shape (1, K). + * + * Outputs: + * - probs: Class probabilities, of shape (N, K). + */ + N = nrow(X) + + # Network: + # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax + Hf = 5 # filter height + Wf = 5 # filter width + stride = 1 + pad = 2 # For same dimensions, (Hf - stride) / 2 + + F1 = nrow(W1) # num conv filters in conv1 + F2 = nrow(W2) # num conv filters in conv2 + N3 = ncol(W3) # num nodes in affine3 + K = ncol(W4) # num nodes in affine4, equal to number of target dimensions (num classes) + + # Compute predictions over mini-batches + probs = matrix(0, rows=N, cols=K) + batch_size = 64 + iters = ceil(N / batch_size) + for(i in 1:iters) { + # Get next batch + beg = ((i-1) * batch_size) %% N + 1 + end = min(N, beg + batch_size - 1) + X_batch = X[beg:end,] + + # Compute forward pass + ## layer 1: conv1 -> relu1 -> pool1 + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) + outr1 = relu::forward(outc1) + [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + ## layer 2: conv2 -> relu2 -> pool2 + [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, + stride, stride, pad, pad) + outr2 = relu::forward(outc2) + [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) + ## layer 3: affine3 -> relu3 + outa3 = affine::forward(outp2, W3, b3) + outr3 = relu::forward(outa3) + ## layer 4: affine4 -> softmax + outa4 = affine::forward(outr3, W4, b4) + probs_batch = softmax::forward(outa4) + + # Store predictions + probs[beg:end,] = probs_batch + } +} + +eval = function(matrix[double] probs, matrix[double] y) + return (double loss, double accuracy) { + /* + * Evaluates a convolutional net using the "LeNet" architecture. + * + * The probs matrix contains the class probability predictions + * of K classes over N examples. The targets, y, have K classes, + * and are one-hot encoded. + * + * Inputs: + * - probs: Class probabilities, of shape (N, K). + * - y: Target matrix, of shape (N, K). + * + * Outputs: + * - loss: Scalar loss, of shape (1). + * - accuracy: Scalar accuracy, of shape (1). + */ + # Compute loss & accuracy + loss = cross_entropy_loss::forward(probs, y) + correct_pred = rowIndexMax(probs) == rowIndexMax(y) + accuracy = mean(correct_pred) +} + +generate_dummy_data = function() + return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) { + /* + * Generate a dummy dataset similar to the MNIST dataset. + * + * Outputs: + * - X: Input data matrix, of shape (N, D). + * - y: Target matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + */ + # Generate dummy input data + N = 1024 # num examples + C = 1 # num input channels + Hin = 28 # input height + Win = 28 # input width + K = 10 # num target classes + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform")) + y = table(seq(1, N), classes) # one-hot encoding +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml new file mode 100644 index 0000000..4c8c434 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml @@ -0,0 +1,77 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# MNIST Softmax - Predict +# +# This script computes the class probability predictions of a +# trained softmax classifier on images of handwritten digits. +# +# Inputs: +# - X: File containing training images. +# The format is "pixel_1, pixel_2, ..., pixel_n". +# - model_dir: Directory containing the trained weights and biases +# of the model. +# - out_dir: Directory to store class probability predictions for +# each image. +# - fmt: [DEFAULT: "csv"] File format of `X` and output predictions. +# Options include: "csv", "mm", "text", and "binary". +# +# Outputs: +# - probs: File containing class probability predictions for each +# image. +# +# Data: +# The X file should contain images of handwritten digits, +# where each example is a 28x28 pixel image of grayscale values in +# the range [0,255] stretched out as 784 pixels. +# +# Sample Invocation: +# 1. Download images. +# +# For example, save images to `nn/examples/data/mnist/images.csv`. +# +# 2. Execute using Spark +# ``` +# spark-submit --master local[*] --driver-memory 5G +# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 +# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-predict.dml +# -nvargs X=nn/examples/data/mnist/images.csv +# model_dir=nn/examples/model/mnist_softmax out_dir=nn/examples/data/mnist +# +source("nn/examples/mnist_softmax.dml") as mnist_softmax + +# Read training data +fmt = ifdef($fmt, "csv") +X = read($X, format=fmt) + +# Scale images to [0,1], and one-hot encode the labels +X = X / 255.0 + +# Read model coefficients +W = read($model_dir+"/W") +b = read($model_dir+"/b") + +# Predict classes +probs = mnist_softmax::predict(X, W, b) + +# Output results +write(probs, $out_dir+"/probs."+fmt, format=fmt) + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml new file mode 100644 index 0000000..09970f0 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml @@ -0,0 +1,110 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# MNIST Softmax - Train +# +# This script trains a softmax classifier on images of handwritten +# digits. +# +# Inputs: +# - train: File containing labeled MNIST training images. +# The format is "label, pixel_1, pixel_2, ..., pixel_n". +# - test: File containing labeled MNIST test images. +# The format is "label, pixel_1, pixel_2, ..., pixel_n". +# - out_dir: Directory to store weights and bias matrices of +# trained model, as well as final test accuracy. +# - fmt: [DEFAULT: "csv"] File format of `train` and `test` data. +# Options include: "csv", "mm", "text", and "binary". +# +# Outputs: +# - W: File containing the trained weights of the model. +# - b: File containing the trained biases of the model. +# - accuracy: File containing the final accuracy on the test data. +# +# Data: +# The MNIST dataset contains labeled images of handwritten digits, +# where each example is a 28x28 pixel image of grayscale values in +# the range [0,255] stretched out as 784 pixels, and each label is +# one of 10 possible digits in [0,9]. +# +# Sample Invocation (running from wihtin the `examples` folder): +# 1. Download data (60,000 training examples, and 10,000 test examples) +# ``` +# nn/examples/get_mnist_data.sh +# ``` +# +# 2. Execute using Spark +# ``` +# spark-submit --master local[*] --driver-memory 10G +# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 +# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-train.dml +# -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv +# epochs=1 out_dir=nn/examples/model/mnist_softmax +# ``` +# +source("nn/examples/mnist_softmax.dml") as mnist_softmax + +# Read training data +fmt = ifdef($fmt, "csv") +train = read($train, format=fmt) +test = read($test, format=fmt) +epochs = ifdef($epochs, 1) +out_dir = ifdef($out_dir, ".") + +# Extract images and labels +images = train[,2:ncol(train)] +labels = train[,1] +X_test = test[,2:ncol(test)] +y_test = test[,1] + +# Scale images to [0,1], and one-hot encode the labels +n = nrow(train) +n_test = nrow(test) +classes = 10 +images = images / 255.0 +labels = table(seq(1, n), labels+1, n, classes) +X_test = X_test / 255.0 +y_test = table(seq(1, n_test), y_test+1, n_test, classes) + +# Split into training (55,000 examples) and validation (5,000 examples) +X = images[5001:nrow(images),] +X_val = images[1:5000,] +y = labels[5001:nrow(images),] +y_val = labels[1:5000,] + +# Train +[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs) + +# Write model out +write(W, out_dir+"/W") +write(b, out_dir+"/b") + +# Eval on test set +probs = mnist_softmax::predict(X_test, W, b) +[loss, accuracy] = mnist_softmax::eval(probs, y_test) + +# Output results +print("Test Accuracy: " + accuracy) +write(accuracy, out_dir+"/accuracy") + +print("") +print("") + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/1f5cf697/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml new file mode 100644 index 0000000..a529a12 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax.dml @@ -0,0 +1,178 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * MNIST Softmax Example + */ +# Imports +source("nn/layers/affine.dml") as affine +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/softmax.dml") as softmax +source("nn/optim/sgd_nesterov.dml") as sgd_nesterov + +train = function(matrix[double] X, matrix[double] y, + matrix[double] X_val, matrix[double] y_val, + int epochs) + return (matrix[double] W, matrix[double] b) { + /* + * Trains a softmax classifier. + * + * The input matrix, X, has N examples, each with D features. + * The targets, y, have K classes, and are one-hot encoded. + * + * Inputs: + * - X: Input data matrix, of shape (N, D). + * - y: Target matrix, of shape (N, K). + * - X_val: Input validation data matrix, of shape (N, C*Hin*Win). + * - y_val: Target validation matrix, of shape (N, K). + * - epochs: Total number of full training loops over the full data set. + * + * Outputs: + * - W: Weights (parameters) matrix, of shape (D, M). + * - b: Biases vector, of shape (1, M). + */ + N = nrow(X) # num examples + D = ncol(X) # num features + K = ncol(y) # num classes + + # Create softmax classifier: + # affine -> softmax + [W, b] = affine::init(D, K) + W = W / sqrt(2.0/(D)) * sqrt(1/(D)) + + # Initialize SGD w/ Nesterov momentum optimizer + lr = 0.2 # learning rate + mu = 0 # momentum + decay = 0.99 # learning rate decay constant + vW = sgd_nesterov::init(W) # optimizer momentum state for W + vb = sgd_nesterov::init(b) # optimizer momentum state for b + + # Optimize + print("Starting optimization") + batch_size = 50 + iters = 1000 #ceil(N / batch_size) + for (e in 1:epochs) { + for(i in 1:iters) { + # Get next batch + beg = ((i-1) * batch_size) %% N + 1 + end = min(N, beg + batch_size - 1) + X_batch = X[beg:end,] + y_batch = y[beg:end,] + + # Compute forward pass + ## affine & softmax: + out = affine::forward(X_batch, W, b) + probs = softmax::forward(out) + + # Compute loss & accuracy for training & validation data + loss = cross_entropy_loss::forward(probs, y_batch) + accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch)) + probs_val = predict(X_val, W, b) + loss_val = cross_entropy_loss::forward(probs_val, y_val) + accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val)) + print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " + + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) + + # Compute backward pass + ## loss: + dprobs = cross_entropy_loss::backward(probs, y_batch) + ## affine & softmax: + dout = softmax::backward(dprobs, out) + [dX_batch, dW, db] = affine::backward(dout, X_batch, W, b) + + # Optimize with SGD w/ Nesterov momentum + [W, vW] = sgd_nesterov::update(W, dW, lr, mu, vW) + [b, vb] = sgd_nesterov::update(b, db, lr, mu, vb) + } + # Anneal momentum towards 0.999 + mu = mu + (0.999 - mu)/(1+epochs-e) + # Decay learning rate + lr = lr * decay + } +} + +predict = function(matrix[double] X, matrix[double] W, matrix[double] b) + return (matrix[double] probs) { + /* + * Computes the class probability predictions of a softmax classifier. + * + * The input matrix, X, has N examples, each with D features. + * + * Inputs: + * - X: Input data matrix, of shape (N, D). + * - W: Weights (parameters) matrix, of shape (D, M). + * - b: Biases vector, of shape (1, M). + * + * Outputs: + * - probs: Class probabilities, of shape (N, K). + */ + # Compute forward pass + ## affine & softmax: + out = affine::forward(X, W, b) + probs = softmax::forward(out) +} + +eval = function(matrix[double] probs, matrix[double] y) + return (double loss, double accuracy) { + /* + * Evaluates a softmax classifier. + * + * The probs matrix contains the class probability predictions + * of K classes over N examples. The targets, y, have K classes, + * and are one-hot encoded. + * + * Inputs: + * - probs: Class probabilities, of shape (N, K). + * - y: Target matrix, of shape (N, K). + * + * Outputs: + * - loss: Scalar loss, of shape (1). + * - accuracy: Scalar accuracy, of shape (1). + */ + # Compute loss & accuracy + loss = cross_entropy_loss::forward(probs, y) + correct_pred = rowIndexMax(probs) == rowIndexMax(y) + accuracy = mean(correct_pred) +} + +generate_dummy_data = function() + return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) { + /* + * Generate a dummy dataset similar to the MNIST dataset. + * + * Outputs: + * - X: Input data matrix, of shape (N, D). + * - y: Target matrix, of shape (N, K). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + */ + # Generate dummy input data + N = 1024 # num examples + C = 1 # num input channels + Hin = 28 # input height + Win = 28 # input width + T = 10 # num targets + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + classes = round(rand(rows=N, cols=1, min=1, max=T, pdf="uniform")) + y = table(seq(1, N), classes) # one-hot encoding +} +
