http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/test/test.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml new file mode 100644 index 0000000..a5cb497 --- /dev/null +++ b/scripts/nn/test/test.dml @@ -0,0 +1,549 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Various tests, not including gradient checks. + */ +source("nn/layers/batch_norm1d.dml") as batch_norm1d +source("nn/layers/batch_norm2d.dml") as batch_norm2d +source("nn/layers/conv2d.dml") as conv2d +source("nn/layers/conv2d_builtin.dml") as conv2d_builtin +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/max_pool2d.dml") as max_pool2d +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin +source("nn/layers/tanh.dml") as tanh +source("nn/test/conv2d_simple.dml") as conv2d_simple +source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple +source("nn/test/util.dml") as test_util +source("nn/util.dml") as util + +batch_norm1d = function() { + /* + * Test for the 1D batch normalization function. + */ + print("Testing the 1D batch normalization function.") + + # Generate data + N = 4 # Number of examples + D = 4 # Number of features + mode = 'train' # execution mode + mu = 0.9 # momentum of moving averages + eps = 1e-5 # smoothing term + X = matrix(seq(1,16), rows=N, cols=D) + + # Create layer + [gamma, beta, ema_mean, ema_var] = batch_norm1d::init(D) + + # Forward + [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = + batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) + + # Equivalency check + target = matrix("-1.34160721 -1.34160721 -1.34160733 -1.34160709 + -0.44720244 -0.44720244 -0.44720244 -0.44720232 + 0.44720244 0.44720232 0.44720244 0.44720244 + 1.34160733 1.34160721 1.34160733 1.34160733", rows=1, cols=N*D) + out = matrix(out, rows=1, cols=N*D) + for (i in 1:length(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(target[1,i]), 1e-3, 1e-4) + } +} + +conv2d = function() { + /* + * Test for the 2D convolution functions. + */ + print("Testing the 2D convolution functions.") + + # Generate data + N = 2 # num examples + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + F = 2 # num filters + Hf = 3 # filter height + Wf = 3 # filter width + stride = 1 + pad = 1 + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + + # Create layer + [W, b] = conv2d::init(F, C, Hf, Wf) + + # Forward + [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + + # Equivalency check + out = matrix(out, rows=1, cols=N*F*Hout*Wout) + out_simple = matrix(out_simple, rows=1, cols=N*F*Hout*Wout) + out_builtin = matrix(out_builtin, rows=1, cols=N*F*Hout*Wout) + for (i in 1:length(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(out_simple[1,i]), 1e-10, 1e-12) + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(out_builtin[1,i]), 1e-10, 1e-12) + } +} + +cross_entropy_loss = function() { + /* + * Test for the cross-entropy loss function. + * + * Here we make sure that the cross-entropy loss function does + * not propagate `infinity` values in the case that a prediction is +` * exactly equal to 0. + */ + print("Testing the cross-entropy loss function with zero-valued predictions.") + + # Generate data + N = 3 # num examples + K = 10 # num targets + pred = matrix(0, rows=N, cols=K) + y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") + y = y / rowSums(y) # normalized probs + + loss = cross_entropy_loss::forward(pred, y) + + inf = 1/0 + if (loss == inf) { + print("ERROR: The cross-entropy loss function ouptuts infinity for all-zero predictions.") + } +} + +im2col = function() { + /* + * Test for the `im2col` and `col2im` functions. + */ + print("Testing the im2col and col2im functions.") + + # Generate data + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + Hf = 3 # filter height + Wf = 3 # filter width + stride = 2 + pad = (Hin * stride - Hin + Hf - stride) / 2 + Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1)) + Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1)) + x = rand(rows=C, cols=Hin*Win) + + # pad + x_pad = util::pad_image(x, Hin, Win, pad, pad, 0) + + # im2col + x_cols = util::im2col(x_pad, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride) + + if (ncol(x_cols) != Hout*Wout) { + print("ERROR: im2col does not yield the correct output size: " + + ncol(x_cols)+" (actual) vs. "+Hout*Wout+" (correct).") + } + + # col2im + x_pad2 = util::col2im(x_cols, C, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride, "none") + + # Equivalency check + equivalent = test_util::all_equal(x_pad, x_pad2) + if (!equivalent) { + print("ERROR: im2col and then col2im does not yield the original image.") + } +} + +padding = function() { + /* + * Test for the `pad_image` and `unpad_image` functions. + */ + print("Testing the padding and unpadding functions.") + + # Generate data + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + pad = 3 # padding + x = rand(rows=C, cols=Hin*Win) + + # Pad image + x_pad = util::pad_image(x, Hin, Win, pad, pad, 0) + + # Check for padded rows & columns + for (c in 1:C) { + x_pad_slice = matrix(x_pad[c,], rows=Hin+2*pad, cols=Win+2*pad) + for (i in 1:pad) { + rowsum = sum(x_pad_slice[i,]) + colsum = sum(x_pad_slice[,i]) + if (rowsum != 0) + print("ERROR: Padding was not applied to row " + i + ".") + if (colsum != 0) + print("ERROR: Padding was not applied to column " + i + ".") + } + } + + # Unpad image + x1 = util::unpad_image(x_pad, Hin, Win, pad, pad) + + # Equivalency check + equivalent = test_util::all_equal(x, x1) + if (!equivalent) { + print("ERROR: Padding and then unpadding does not yield the original image.") + } +} + +max_pool2d = function() { + /* + * Test for the 2D max pooling functions. + */ + print("Testing the 2D max pooling functions.") + + # Generate data + N = 2 # num examples + C = 3 # num channels + Hin = 8 # input height + Win = 8 # input width + Hf = 2 # filter height + Wf = 2 # filter width + stride = 2 + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + + for (padh in 0:3) { + for (padw in 0:3) { + print(" - Testing w/ padh="+padh+" & padw="+padw+".") + #if (1==1) {} # force correct printing + #print(" - Testing forward") + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, + padh, padw) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, + Hf, Wf, + stride, stride, + padh, padw) + + # Equivalency check + out = matrix(out, rows=1, cols=N*C*Hout*Wout) + out_simple = matrix(out_simple, rows=1, cols=N*C*Hout*Wout) + out_builtin = matrix(out_builtin, rows=1, cols=N*C*Hout*Wout) + for (i in 1:length(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(out_simple[1,i]), 1e-10, 1e-12) + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(out_builtin[1,i]), 1e-10, 1e-12) + } + + #print(" - Testing backward") + dout = rand(rows=N, cols=C*Hout*Wout, pdf="normal") + dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, + padh, padw) + dX_simple = max_pool2d_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win, + Hf, Wf, stride, stride, padh, padw) + dX_builtin = max_pool2d_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win, + Hf, Wf, stride, stride, padh, padw) + + # Equivalency check + dX = matrix(dX, rows=1, cols=N*C*Hin*Win) + dX_simple = matrix(dX_simple, rows=1, cols=N*C*Hin*Win) + dX_builtin = matrix(dX_builtin, rows=1, cols=N*C*Hin*Win) + for (i in 1:length(dX)) { + rel_error = test_util::check_rel_error(as.scalar(dX[1,i]), + as.scalar(dX_simple[1,i]), 1e-10, 1e-12) + rel_error = test_util::check_rel_error(as.scalar(dX[1,i]), + as.scalar(dX_builtin[1,i]), 1e-10, 1e-12) + } + } + } + + # --- + print(" - Testing for correct behavior against known answer w/ pad=0.") + # generate data + # -- channel 1 + # 1 2 3 4 + # 5 6 7 8 + # 9 10 11 12 + # 13 14 15 16 + # -- channel 2 + # 1 5 9 13 + # 2 6 10 14 + # 3 7 11 15 + # 4 8 12 16 + C = 2 # num channels + Hin = 4 # input height + Win = 4 # input width + X = matrix(seq(1,16,1), rows=Hin, cols=Win) + X = matrix(rbind(X, t(X)), rows=1, cols=C*Hin*Win) # C=2 + X = rbind(X, X) # n=2 + pad = 0 + + # forward + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + + # equivalency check + # -- channel 1 + # 6 8 + # 14 16 + # -- channel 2 + # 6 14 + # 8 16 + target = matrix("6 8 14 16 6 14 8 16", rows=1, cols=C*Hout*Wout) + target = rbind(target, target) # n=2 + tmp = test_util::check_all_equal(out, target) + tmp = test_util::check_all_equal(out_simple, target) + tmp = test_util::check_all_equal(out_builtin, target) + + print(" - Testing for correct behavior against known answer w/ pad=1.") + # generate data + # -- channel 1 + # 0 0 0 0 0 0 + # 0 1 2 3 4 0 + # 0 5 6 7 8 0 + # 0 9 10 11 12 0 + # 0 13 14 15 16 0 + # 0 0 0 0 0 0 + # -- channel 2 + # 0 0 0 0 0 0 + # 0 1 5 9 13 0 + # 0 2 6 10 14 0 + # 0 3 7 11 15 0 + # 0 4 8 12 16 0 + # 0 0 0 0 0 0 + pad = 1 + + # forward + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + + # equivalency check + # -- channel 1 + # 1 3 4 + # 9 11 12 + # 13 15 16 + # -- channel 2 + # 1 9 13 + # 3 11 15 + # 4 12 16 + target = matrix("1 3 4 9 11 12 13 15 16 1 9 13 3 11 15 4 12 16", rows=1, cols=C*Hout*Wout) + target = rbind(target, target) # n=2 + tmp = test_util::check_all_equal(out, target) + tmp = test_util::check_all_equal(out_simple, target) + tmp = test_util::check_all_equal(out_builtin, target) + + print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=0.") + # generate data + # -- channel 1 + # -1 -2 -3 -4 + # -5 -6 -7 -8 + # -9 -10 -11 -12 + # -13 -14 -15 -16 + # -- channel 2 + # -1 -5 -9 -13 + # -2 -6 -10 -14 + # -3 -7 -11 -15 + # -4 -8 -12 -16 + X = X * -1 + pad = 0 + + # forward + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + + # equivalency check + # -- channel 1 + # -1 -3 + # -9 -11 + # -- channel 2 + # -1 -9 + # -3 -11 + target = matrix("-1 -3 -9 -11 -1 -9 -3 -11", rows=1, cols=C*Hout*Wout) + target = rbind(target, target) # n=2 + tmp = test_util::check_all_equal(out, target) + tmp = test_util::check_all_equal(out_simple, target) + tmp = test_util::check_all_equal(out_builtin, target) + + + print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=1.") + # generate data + # -- channel 1 + # 0 0 0 0 0 0 + # 0 -1 -2 -3 -4 0 + # 0 -5 -6 -7 -8 0 + # 0 -9 -10 -11 -12 0 + # 0 -13 -14 -15 -16 0 + # 0 0 0 0 0 0 + # -- channel 2 + # 0 0 0 0 0 0 + # 0 -1 -5 -9 -13 0 + # 0 -2 -6 -10 -14 0 + # 0 -3 -7 -11 -15 0 + # 0 -4 -8 -12 -16 0 + # 0 0 0 0 0 0 + pad = 1 + + # forward + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + + # equivalency check + # -- channel 1 + # 0 0 0 + # 0 -6 0 + # 0 0 0 + # -- channel 2 + # 0 0 0 + # 0 -6 0 + # 0 0 0 + target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", + rows=1, cols=C*Hout*Wout) + target = rbind(target, target) # n=2 + tmp = test_util::check_all_equal(out, target) + tmp = test_util::check_all_equal(out_simple, target) + tmp = test_util::check_all_equal(out_builtin, target) +} + +batch_norm2d = function() { + /* + * Test for the 2D (spatial) batch normalization function. + */ + print("Testing the 2D (spatial) batch normalization function.") + + # Generate data + N = 2 # Number of examples + C = 3 # num channels + Hin = 4 # input height + Win = 5 # input width + mode = 'train' # execution mode + mu = 0.9 # momentum of moving averages + eps = 1e-5 # smoothing term + X = matrix("70 29 23 55 72 + 42 98 68 48 39 + 34 73 44 6 40 + 74 18 18 53 53 + + 63 85 72 61 72 + 32 36 23 29 63 + 9 43 43 49 43 + 31 43 89 94 50 + + 62 12 32 41 87 + 25 48 99 52 61 + 12 83 60 55 34 + 30 42 68 88 51 + + + 67 59 62 67 84 + 8 76 24 19 57 + 10 89 63 72 2 + 59 56 16 15 70 + + 32 69 55 39 93 + 84 36 4 30 40 + 70 100 36 76 59 + 69 15 40 24 34 + + 51 67 11 13 32 + 66 85 55 85 38 + 32 35 17 83 34 + 55 58 52 0 99", rows=N, cols=C*Hin*Win) + + # Create layer + [gamma, beta, ema_mean, ema_var] = batch_norm2d::init(C) + + # Forward + [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] = + batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps) + + # Equivalency check + target = matrix("0.86215019 -0.76679718 -1.00517964 0.26619387 0.94161105 + -0.25030172 1.97460198 0.78268933 -0.01191914 -0.36949289 + -0.56814504 0.98134136 -0.17084086 -1.68059683 -0.32976246 + 1.02107191 -1.20383179 -1.20383179 0.18673301 0.18673301 + + 0.50426388 1.41921711 0.87856293 0.42108631 0.87856293 + -0.78498828 -0.61863315 -1.15928721 -0.90975463 0.50426388 + -1.74153018 -0.32751167 -0.32751167 -0.07797909 -0.32751167 + -0.82657707 -0.32751167 1.58557224 1.79351616 -0.0363903 + + 0.4607178 -1.49978399 -0.71558321 -0.36269283 1.44096887 + -0.99005347 -0.08822262 1.91148913 0.06861746 0.42150795 + -1.49978399 1.28412855 0.38229787 0.18624771 -0.63716316 + -0.79400325 -0.32348287 0.69597805 1.48017895 0.0294075 + + + 0.74295878 0.42511559 0.54430676 0.74295878 1.41837597 + -1.60113597 1.10053277 -0.96544927 -1.16410136 0.34565473 + -1.52167511 1.61702824 0.5840373 0.94161105 -1.83951855 + 0.42511559 0.30592418 -1.28329265 -1.32302308 0.86215019 + + -0.78498828 0.75379658 0.17155361 -0.4938668 1.75192738 + 1.37762833 -0.61863315 -1.9494741 -0.86816585 -0.45227802 + 0.79538536 2.04304862 -0.61863315 1.04491806 0.33790874 + 0.75379658 -1.49199748 -0.45227802 -1.11769855 -0.70181072 + + 0.0294075 0.65676796 -1.53899395 -1.46057391 -0.71558321 + 0.61755812 1.36254871 0.18624771 1.36254871 -0.48032296 + -0.71558321 -0.59795308 -1.30373383 1.28412855 -0.63716316 + 0.18624771 0.30387771 0.06861746 -1.97030437 1.91148913", + rows=1, cols=N*C*Hin*Win) + out = matrix(out, rows=1, cols=N*C*Hin*Win) + for (i in 1:length(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[1,i]), + as.scalar(target[1,i]), 1e-3, 1e-4) + } +} + +tanh = function() { + /* + * Test for the `tanh` forward function. + */ + print("Testing the tanh forward function.") + + # Generate data + N = 2 # num examples + C = 3 # num channels + X = rand(rows=N, cols=C, pdf="normal") + + out = tanh::forward(X) + out_ref = (exp(X) - exp(-X)) / (exp(X) + exp(-X)) + + # Equivalency check + for (i in 1:nrow(out)) { + for (j in 1:ncol(out)) { + rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), + 1e-10, 1e-12) + } + } +} +
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/test/util.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/test/util.dml b/scripts/nn/test/util.dml new file mode 100644 index 0000000..e32a885 --- /dev/null +++ b/scripts/nn/test/util.dml @@ -0,0 +1,155 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Test utility functions. + */ + +all_equal = function(matrix[double] X1, matrix[double] X2) + return(boolean equivalent) { + /* + * Determine if two matrices are equivalent. + * + * Inputs: + * - X1: Inputs, of shape (any, any). + * - X2: Inputs, of same shape as X1. + * + * Outputs: + * - equivalent: Whether or not the two matrices are equivalent. + */ + equivalent = as.logical(prod(X1 == X2)) +} + +check_all_equal = function(matrix[double] X1, matrix[double] X2) + return(boolean equivalent) { + /* + * Check if two matrices are equivalent, and report any issues. + * + * Issues an "ERROR" statement if elements of the two matrices are + * not equal. + * + * Inputs: + * - X1: Inputs, of shape (any, any). + * - X2: Inputs, of same shape as X1. + * + * Outputs: + * - equivalent: Whether or not the two matrices are equivalent. + */ + # Determine if matrices are equivalent + equivalent = all_equal(X1, X2) + + # Evaluate relative error + if (!equivalent) { + print("ERROR: The two matrices are not equivalent.") + } +} + +compute_rel_error = function(double x1, double x2) + return (double rel_error) { + /* + * Relative error measure between two values. + * + * Uses smoothing to avoid divide-by-zero errors. + * + * Inputs: + * - x1: First value. + * - x2: Second value. + * + * Outputs: + * - rel_error: Relative error measure between the two values. + */ + rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2)) +} + +check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn) + return (double rel_error) { + /* + * Check and report any issues with the relative error measure between + * two values. + * + * Issues an "ERROR" statement for relative errors > thresh_error, + * indicating that the implementation is likely incorrect. + * + * Issues a "WARNING" statement for relative errors < thresh_error + * but > thresh_warn, indicating that the implementation may be + * incorrect. + * + * Inputs: + * - x1: First value. + * - x2: Second value. + * - thresh_error: Error threshold. + * - thresh_warn: Warning threshold. + * + * Outputs: + * - rel_error: Relative error measure between the two values. + */ + # Compute relative error + rel_error = compute_rel_error(x1, x2) + + # Evaluate relative error + if (rel_error > thresh_error) { + print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 + + " vs " + x2 + ".") + } + else if (rel_error > thresh_warn & rel_error <= thresh_error) { + print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error + + " with " + x1 + " vs " + x2 + ".") + } +} + +check_rel_grad_error = function(double dw_a, double dw_n, double lossph, double lossmh) + return (double rel_error) { + /* + * Check and report any issues with the relative error measure between + * the analytical and numerical partial derivatives. + * + * - Issues an "ERROR" statement for relative errors > 1e-2, + * indicating that the gradient is likely incorrect. + * - Issues a "WARNING" statement for relative errors < 1e-2 + * but > 1e-4, indicating that the may be incorrect. + * + * Inputs: + * - dw_a: Analytical partial derivative wrt w. + * - dw_n: Numerical partial derivative wrt w. + * - lossph: Loss evaluated with w set to w+h. + * - lossmh: Loss evaluated with w set to w-h. + * + * Outputs: + * - rel_error: Relative error measure between the two derivatives. + */ + # Compute relative error + rel_error = compute_rel_error(dw_a, dw_n) + + # Evaluate relative error + thresh_error = 1e-2 + thresh_warn = 1e-4 + if (rel_error > thresh_error) { + print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a + + " analytical vs " + dw_n + " numerical, with lossph " + lossph + + " and lossmh " + lossmh) + } + else if (rel_error > thresh_warn & rel_error <= thresh_error) { + print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error + + " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph + + " and lossmh " + lossmh) + } +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/util.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/util.dml b/scripts/nn/util.dml new file mode 100644 index 0000000..3a73f08 --- /dev/null +++ b/scripts/nn/util.dml @@ -0,0 +1,202 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Utility functions. + */ + +channel_sums = function(matrix[double] X, int C, int Hin, int Win) + return (matrix[double] out) { + /* + * Computes a channel-wise summation over a 4D input. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * + * Outputs: + * - out: Outputs, of shape (C, 1). + */ + # Here we sum each column, reshape to (C, Hin*Win), and sum each row to result in the summation + # for each channel. + out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win)) # shape (C, 1) +} + +im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew) + return (matrix[double] img_cols) { + /* + * Rearrange local image regions (patches) into columns. + * + * Assumes image has already been padded as necessary. + * + * Inputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + * - Hin: Input height, including padding. + * - Win: Input width, including padding. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * + * Outputs: + * - img_cols: Local spatial regions (patches) of the image stretched + * out into columns, of shape (C*Hf*Wf, Hout*Wout). + */ + C = nrow(img) + Hout = as.integer(floor((Hin-Hf)/strideh + 1)) + Wout = as.integer(floor((Win-Wf)/stridew + 1)) + + # Note: We start with `img_cols` transposed to allow for row-major + # left-indexing inside the loop, which is more performant. + img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf) # zeros + parfor (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1)*strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + win = (wout-1)*stridew + 1 + # Extract a local patch of the input image corresponding spatially to the filter sizes. + img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C) { # all channels + img_slice = matrix(img[c,], rows=Hin, cols=Win) # reshape + img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf) + } + img_cols[(hout-1)*Wout + wout,] = t(matrix(img_patch, rows=C*Hf*Wf, cols=1)) # reshape + } + } + img_cols = t(img_cols) +} + +col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, string reduction) + return (matrix[double] img) { + /* + * Create an image from columns of local image regions (patches). + * + * The reduction strategy determines how to deal with overlapping + * patches. If it is set to "add", any overlapping patches will be + * added together when creating the image. This is useful when + * computing gradients on the original image given gradients on the + * patches. Otherwise, if "none" is provided, any overlapping + * patches will just override previous ones when creating the image. + * This is useful when recreating an image from the output of + * `im2col`. + * + * Assumes original image was already padded as necessary. + * + * Inputs: + * - img_cols: Local spatial regions (patches) of the image stretched + * out into columns, of shape (C*Hf*Wf, Hout*Wout). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height, including padding. + * - Win: Input width, including padding. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - reduction: The reduction strategy to use for overlapping + * patches. Valid options are "add" and "none". + * + * Outputs: + * - img: Input image, of shape (C, Hin*Win). + */ + Hout = as.integer(floor((Hin-Hf)/strideh + 1)) + Wout = as.integer(floor((Win-Wf)/stridew + 1)) + + img = matrix(0, rows=C, cols=Hin*Win) # zeros + for (hout in 1:Hout) { # all output rows + hin = (hout-1)*strideh + 1 + for (wout in 1:Wout) { # all output columns + win = (wout-1)*stridew + 1 + # Extract a local patch of the input image corresponding spatially to the filter sizes. + img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C) { # all channels + img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf) # reshape + if (reduction == "add") { + img_slice = matrix(0, rows=Hin, cols=Win) + img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice + img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win) + } else { + img_slice = matrix(img[c,], rows=Hin, cols=Win) + img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice + img[c,] = matrix(img_slice, rows=1, cols=Hin*Win) + } + } + } + } +} + +pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw, double pad_value) + return (matrix[double] img_padded) { + /* + * Pads an image along the height and width dimensions with zeros. + * + * Inputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + * - Hin: Input height. + * - Win: Input width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * - pad_value: Value to use for the padding. + * A typical value is 0. + * + * Outputs: + * - img_padded: The input image padded along the height and width + * dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)). + */ + C = nrow(img) + img_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + img_slice = matrix(img[c,], rows=Hin, cols=Win) # depth slice C reshaped + img_padded_slice = matrix(pad_value, rows=Hin+2*padh, cols=Win+2*padw) + img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice + img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } +} + +unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw) + return (matrix[double] img) { + /* + * Unpads an image along the height and width dimensions. + * + * Inputs: + * - img_padded: The input image padded along the height and width + * dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)). + * - Hin: Input height of unpadded image. + * - Win: Input width of unpadded image. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + */ + C = nrow(img_padded) + img = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C) { + img_padded_slice = matrix(img_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) + img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] + img[c,] = matrix(img_slice, rows=1, cols=Hin*Win) + } +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/README.md ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/README.md b/scripts/staging/SystemML-NN/README.md deleted file mode 100644 index b80f2c6..0000000 --- a/scripts/staging/SystemML-NN/README.md +++ /dev/null @@ -1,183 +0,0 @@ -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -# SystemML-NN - -### A deep learning library for [Apache SystemML](https://github.com/apache/incubator-systemml). - -## Examples: -#### Please see the [`examples`](nn/examples) folder for more detailed examples, or view the following two quick examples. -### Neural net for regression with vanilla SGD: -```python -# Imports -source("nn/layers/affine.dml") as affine -source("nn/layers/l2_loss.dml") as l2_loss -source("nn/layers/relu.dml") as relu -source("nn/optim/sgd.dml") as sgd - -# Generate input data -N = 1024 # num examples -D = 100 # num features -t = 1 # num targets -X = rand(rows=N, cols=D, pdf="normal") -y = rand(rows=N, cols=t) - -# Create 2-layer network: -## affine1 -> relu1 -> affine2 -M = 64 # number of neurons -[W1, b1] = affine::init(D, M) -[W2, b2] = affine::init(M, t) - -# Initialize optimizer -lr = 0.05 # learning rate -mu = 0.9 # momentum -decay = 0.99 # learning rate decay constant - -# Optimize -print("Starting optimization") -batch_size = 32 -epochs = 5 -iters = 1024 / batch_size -for (e in 1:epochs) { - for(i in 1:iters) { - # Get next batch - X_batch = X[i:i+batch_size-1,] - y_batch = y[i:i+batch_size-1,] - - # Compute forward pass - out1 = affine::forward(X_batch, W1, b1) - outr1 = relu::forward(out1) - out2 = affine::forward(outr1, W2, b2) - - # Compute loss - loss = l2_loss::forward(out2, y_batch) - print("L2 loss: " + loss) - - # Compute backward pass - dout2 = l2_loss::backward(out2, y_batch) - [doutr1, dW2, db2] = affine::backward(dout2, outr1, W2, b2) - dout1 = relu::backward(doutr1, out1) - [dX_batch, dW1, db1] = affine::backward(dout1, X_batch, W1, b1) - - # Optimize with vanilla SGD - W1 = sgd::update(W1, dW1, lr) - b1 = sgd::update(b1, db1, lr) - W2 = sgd::update(W2, dW2, lr) - b2 = sgd::update(b2, db2, lr) - } - # Decay learning rate - lr = lr * decay -} -``` - -### Neural net for multi-class classification with dropout and SGD w/ Nesterov momentum: -```python -# Imports -source("nn/layers/affine.dml") as affine -source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss -source("nn/layers/dropout.dml") as dropout -source("nn/layers/relu.dml") as relu -source("nn/layers/softmax.dml") as softmax -source("nn/optim/sgd_nesterov.dml") as sgd_nesterov - -# Generate input data -N = 1024 # num examples -D = 100 # num features -t = 5 # num targets -X = rand(rows=N, cols=D, pdf="normal") -classes = round(rand(rows=N, cols=1, min=1, max=t, pdf="uniform")) -y = matrix(0, rows=N, cols=t) -parfor (i in 1:N) { - y[i, as.scalar(classes[i,1])] = 1 # one-hot encoding -} - -# Create network: -# affine1 -> relu1 -> dropout1 -> affine2 -> relu2 -> dropout2 -> affine3 -> softmax -H1 = 64 # number of neurons in 1st hidden layer -H2 = 64 # number of neurons in 2nd hidden layer -p = 0.5 # dropout probability -[W1, b1] = affine::init(D, H1) -[W2, b2] = affine::init(H1, H2) -[W3, b3] = affine::init(H2, t) - -# Initialize SGD w/ Nesterov momentum optimizer -lr = 0.05 # learning rate -mu = 0.5 # momentum -decay = 0.99 # learning rate decay constant -vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1) -vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2) -vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3) - -# Optimize -print("Starting optimization") -batch_size = 64 -epochs = 10 -iters = 1024 / batch_size -for (e in 1:epochs) { - for(i in 1:iters) { - # Get next batch - X_batch = X[i:i+batch_size-1,] - y_batch = y[i:i+batch_size-1,] - - # Compute forward pass - ## layer 1: - out1 = affine::forward(X_batch, W1, b1) - outr1 = relu::forward(out1) - [outd1, maskd1] = dropout::forward(outr1, p, -1) - ## layer 2: - out2 = affine::forward(outd1, W2, b2) - outr2 = relu::forward(out2) - [outd2, maskd2] = dropout::forward(outr2, p, -1) - ## layer 3: - out3 = affine::forward(outd2, W3, b3) - probs = softmax::forward(out3) - - # Compute loss - loss = cross_entropy_loss::forward(probs, y_batch) - print("Cross entropy loss: " + loss) - - # Compute backward pass - ## loss: - dprobs = cross_entropy_loss::backward(probs, y_batch) - ## layer 3: - dout3 = softmax::backward(dprobs, out3) - [doutd2, dW3, db3] = affine::backward(dout3, outd2, W3, b3) - ## layer 2: - doutr2 = dropout::backward(doutd2, outr2, p, maskd2) - dout2 = relu::backward(doutr2, out2) - [doutd1, dW2, db2] = affine::backward(dout2, outd1, W2, b2) - ## layer 1: - doutr1 = dropout::backward(doutd1, outr1, p, maskd1) - dout1 = relu::backward(doutr1, out1) - [dX_batch, dW1, db1] = affine::backward(dout1, X_batch, W1, b1) - - # Optimize with SGD w/ Nesterov momentum - [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1) - [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1) - [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2) - [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2) - [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3) - [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3) - } - # Anneal momentum towards 0.999 - mu = mu + (0.999 - mu)/(1+epochs-e) - # Decay learning rate - lr = lr * decay -} -``` http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb b/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb deleted file mode 100644 index 0423269..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb +++ /dev/null @@ -1,189 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Quick Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a SystemML MLContext object\n", - "from systemml import MLContext, dml\n", - "ml = MLContext(sc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download Data - MNIST" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sh\n", - "mkdir -p data/mnist/\n", - "cd data/mnist/\n", - "curl -O https://pjreddie.com/media/files/mnist_train.csv\n", - "curl -O https://pjreddie.com/media/files/mnist_test.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## SystemML \"LeNet\" Neural Network" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Train" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "script_string = \"\"\"\n", - "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n", - "\n", - "# Read training data\n", - "data = read($data, format=\"csv\")\n", - "n = nrow(data)\n", - "\n", - "# Extract images and labels\n", - "images = data[,2:ncol(data)]\n", - "labels = data[,1]\n", - "\n", - "# Scale images to [-1,1], and one-hot encode the labels\n", - "images = (images / 255.0) * 2 - 1\n", - "labels = table(seq(1, n), labels+1, n, 10)\n", - "\n", - "# Split into training (55,000 examples) and validation (5,000 examples)\n", - "X = images[5001:nrow(images),]\n", - "X_val = images[1:5000,]\n", - "y = labels[5001:nrow(images),]\n", - "y_val = labels[1:5000,]\n", - "\n", - "# Train\n", - "epochs = 10\n", - "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs)\n", - "\"\"\"\n", - "script = (dml(script_string).input(\"$data\", \"data/mnist/mnist_train.csv\")\n", - " .input(C=1, Hin=28, Win=28)\n", - " .output(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))\n", - "W1, b1, W2, b2, W3, b3, W4, b4 = (ml.execute(script)\n", - " .get(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Compute Test Accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "script_string = \"\"\"\n", - "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n", - "\n", - "# Read test data\n", - "data = read($data, format=\"csv\")\n", - "n = nrow(data)\n", - "\n", - "# Extract images and labels\n", - "X_test = data[,2:ncol(data)]\n", - "y_test = data[,1]\n", - "\n", - "# Scale images to [-1,1], and one-hot encode the labels\n", - "X_test = (X_test / 255.0) * 2 - 1\n", - "y_test = table(seq(1, n), y_test+1, n, 10)\n", - "\n", - "# Eval on test set\n", - "probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n", - "[loss, accuracy] = mnist_lenet::eval(probs, y_test)\n", - "\n", - "print(\"Test Accuracy: \" + accuracy)\n", - "\"\"\"\n", - "script = dml(script_string).input(**{\"$data\": \"data/mnist/mnist_train.csv\",\n", - " \"C\": 1, \"Hin\": 28, \"Win\": 28,\n", - " \"W1\": W1, \"b1\": b1,\n", - " \"W2\": W2, \"b2\": b2,\n", - " \"W3\": W3, \"b3\": b3,\n", - " \"W4\": W4, \"b4\": b4})\n", - "ml.execute(script)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Extract Model Into Spark DataFrames For Future Use" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "W1_df = W1.toDF()\n", - "b1_df = b1.toDF()\n", - "W2_df = W2.toDF()\n", - "b2_df = b2.toDF()\n", - "W3_df = W3.toDF()\n", - "b3_df = b3.toDF()\n", - "W4_df = W4.toDF()\n", - "b4_df = b4.toDF()\n", - "W1_df, b1_df, W2_df, b2_df, W3_df, b3_df, W4_df, b4_df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 + Spark 2.x + SystemML", - "language": "python", - "name": "pyspark3_2.x" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb b/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb deleted file mode 100644 index 5e7182a..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Quick Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Create a SystemML MLContext object\n", - "from systemml import MLContext, dml\n", - "ml = MLContext(sc)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download Data - MNIST" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9]. Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "%%sh\n", - "mkdir -p data/mnist/\n", - "cd data/mnist/\n", - "curl -O https://pjreddie.com/media/files/mnist_train.csv\n", - "curl -O https://pjreddie.com/media/files/mnist_test.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## SystemML Softmax Model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Train" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "training = \"\"\"\n", - "source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n", - "\n", - "# Read training data\n", - "data = read($data, format=\"csv\")\n", - "n = nrow(data)\n", - "\n", - "# Extract images and labels\n", - "images = data[,2:ncol(data)]\n", - "labels = data[,1]\n", - "\n", - "# Scale images to [0,1], and one-hot encode the labels\n", - "images = images / 255.0\n", - "labels = table(seq(1, n), labels+1, n, 10)\n", - "\n", - "# Split into training (55,000 examples) and validation (5,000 examples)\n", - "X = images[5001:nrow(images),]\n", - "X_val = images[1:5000,]\n", - "y = labels[5001:nrow(images),]\n", - "y_val = labels[1:5000,]\n", - "\n", - "# Train\n", - "epochs = 1\n", - "[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)\n", - "\"\"\"\n", - "script = dml(training).input(\"$data\", \"data/mnist/mnist_train.csv\").output(\"W\", \"b\")\n", - "W, b = ml.execute(script).get(\"W\", \"b\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Compute Test Accuracy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "testing = \"\"\"\n", - "source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n", - "\n", - "# Read test data\n", - "data = read($data, format=\"csv\")\n", - "n = nrow(data)\n", - "\n", - "# Extract images and labels\n", - "X_test = data[,2:ncol(data)]\n", - "y_test = data[,1]\n", - "\n", - "# Scale images to [0,1], and one-hot encode the labels\n", - "X_test = X_test / 255.0\n", - "y_test = table(seq(1, n), y_test+1, n, 10)\n", - "\n", - "# Eval on test set\n", - "probs = mnist_softmax::predict(X_test, W, b)\n", - "[loss, accuracy] = mnist_softmax::eval(probs, y_test)\n", - "\n", - "print(\"Test Accuracy: \" + accuracy)\n", - "\"\"\"\n", - "script = dml(testing).input(\"$data\", \"data/mnist/mnist_test.csv\", W=W, b=b)\n", - "ml.execute(script)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Extract Model Into Spark DataFrames For Future Use" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "W_df = W.toDF()\n", - "b_df = b.toDF()\n", - "W_df, b_df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.1" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/README.md ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/README.md b/scripts/staging/SystemML-NN/nn/examples/README.md deleted file mode 100644 index d5e9d04..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/README.md +++ /dev/null @@ -1,74 +0,0 @@ -<!-- -{% comment %} -Licensed to the Apache Software Foundation (ASF) under one or more -contributor license agreements. See the NOTICE file distributed with -this work for additional information regarding copyright ownership. -The ASF licenses this file to you under the Apache License, Version 2.0 -(the "License"); you may not use this file except in compliance with -the License. You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -{% endcomment %} ---> - -# SystemML-NN Examples - -#### This folder contains scripts and PySpark Jupyter notebooks serving as examples of using the *SystemML-NN* (`nn`) deep learning library. - ---- - -# Examples -### MNIST Softmax Classifier - -* This example trains a softmax classifier, which is essentially a multi-class logistic regression model, on the MNIST data. The model will be trained on the *training* images, validated on the *validation* images, and tested for final performance metrics on the *test* images. -* Notebook: `Example - MNIST Softmax Classifier.ipynb`. -* DML Functions: `mnist_softmax.dml` -* Training script: `mnist_softmax-train.dml` -* Prediction script: `mnist_softmax-predict.dml` - -### MNIST "LeNet" Neural Net - -* This example trains a neural network on the MNIST data using a ["LeNet" architecture](http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf). The model will be trained on the *training* images, validated on the *validation* images, and tested for final performance metrics on the *test* images. -* Notebook: `Example - MNIST LeNet.ipynb`. -* DML Functions: `mnist_lenet.dml` -* Training script: `mnist_lenet-train.dml` -* Prediction script: `mnist_lenet-predict.dml` - ---- - -# Setup -## Code -* To run the examples, please first download and unzip the project via GitHub using the "Clone or download" button on the [homepage of the project](https://github.com/dusenberrymw/systemml-nn), *or* via the following commands: - - ``` - git clone https://github.com/dusenberrymw/systemml-nn.git - ``` - -* Then, move into the `systemml-nn` folder via: - ``` - cd systemml-nn - ``` - -## Data -* These examples use the classic [MNIST](http://yann.lecun.com/exdb/mnist/) dataset, which contains labeled 28x28 pixel images of handwritten digits in the range of 0-9. There are 60,000 training images, and 10,000 testing images. Of the 60,000 training images, 5,000 will be used as validation images. -* **Download**: - * **Notebooks**: The data will be automatically downloaded as a step in either of the example notebooks. - * **Training scripts**: Please run `get_mnist_data.sh` to download the data separately. - -## Execution -* These examples contain scripts written in SystemML's R-like language (`*.dml`), as well as PySpark Jupyter notebooks (`*.ipynb`). The scripts contain the math for the algorithms, enclosed in functions, and the notebooks serve as full, end-to-end examples of reading in data, training models using the functions within the scripts, and evaluating final performance. -* **Notebooks**: To run the notebook examples, please install the SystemML Python package with `pip install systemml`, and then startup Jupyter in the following manner from this directory (or for more information, please see [this great blog post](http://spark.tc/0-to-life-changing-application-with-apache-systemml/)): - - ``` - PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook" pyspark --master local[*] --driver-memory 3G --driver-class-path SystemML.jar --jars SystemML.jar - ``` - - Note that all printed output, such as training statistics, from the SystemML scripts will be sent to the terminal in which Jupyter was started (for now...). - -* **Scripts**: To run the scripts from the command line using `spark-submit`, please see the comments located at the top of the `-train` and `-predict` scripts. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh b/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh deleted file mode 100755 index deb0c40..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -DIR="$(cd "$(dirname "$0")" && pwd)" -mkdir -p $DIR/data/mnist/ -cd $DIR/data/mnist/ -curl -O https://pjreddie.com/media/files/mnist_train.csv -curl -O https://pjreddie.com/media/files/mnist_test.csv - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml deleted file mode 100644 index 85a5307..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml +++ /dev/null @@ -1,91 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# MNIST LeNet - Predict -# -# This script computes the class probability predictions of a -# trained convolutional net using the "LeNet" architecture on -# images of handwritten digits. -# -# Inputs: -# - X: File containing training images. -# The format is "pixel_1, pixel_2, ..., pixel_n". -# - C: Number of color chanels in the images. -# - Hin: Input image height. -# - Win: Input image width. -# - model_dir: Directory containing the trained weights and biases -# of the model. -# - out_dir: Directory to store class probability predictions for -# each image. -# - fmt: [DEFAULT: "csv"] File format of `X` and output predictions. -# Options include: "csv", "mm", "text", and "binary". -# -# Outputs: -# - probs: File containing class probability predictions for each -# image. -# -# Data: -# The X file should contain images of handwritten digits, -# where each example is a 28x28 pixel image of grayscale values in -# the range [0,255] stretched out as 784 pixels. -# -# Sample Invocation (running from outside the `nn` folder): -# 1. Download images. -# -# For example, save images to `nn/examples/data/mnist/images.csv`. -# -# 2. Execute using Spark -# ``` -# spark-submit --master local[*] --driver-memory 5G -# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 -# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet-predict.dml -# -nvargs X=nn/examples/data/mnist/images.csv C=1 Hin=28 Win=28 -# model_dir=nn/examples/model/mnist_lenet out_dir=nn/examples/data/mnist -# ``` -# -source("nn/examples/mnist_lenet.dml") as mnist_lenet - -# Read training data -fmt = ifdef($fmt, "csv") -X = read($X, format=fmt) -C = $C -Hin = $Hin -Win = $Win - -# Scale images to [-1,1] -X = (X / 255.0) * 2 - 1 - -# Read model coefficients -W1 = read($model_dir+"/W1") -b1 = read($model_dir+"/b1") -W2 = read($model_dir+"/W2") -b2 = read($model_dir+"/b2") -W3 = read($model_dir+"/W3") -b3 = read($model_dir+"/b3") -W4 = read($model_dir+"/W4") -b4 = read($model_dir+"/b4") - -# Predict classes -probs = mnist_lenet::predict(X, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) - -# Output results -write(probs, $out_dir+"/probs."+fmt, format=fmt) - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml deleted file mode 100644 index 0fc733e..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml +++ /dev/null @@ -1,123 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# MNIST LeNet - Train -# -# This script trains a convolutional net using the "LeNet" architecture -# on images of handwritten digits. -# -# Inputs: -# - train: File containing labeled MNIST training images. -# The format is "label, pixel_1, pixel_2, ..., pixel_n". -# - test: File containing labeled MNIST test images. -# The format is "label, pixel_1, pixel_2, ..., pixel_n". -# - C: Number of color chanels in the images. -# - Hin: Input image height. -# - Win: Input image width. -# - epochs: [DEFAULT: 10] Total number of full training loops over -# the full data set. -# - out_dir: [DEFAULT: "."] Directory to store weights and bias -# matrices of trained model, as well as final test accuracy. -# - fmt: [DEFAULT: "csv"] File format of `train` and `test` data. -# Options include: "csv", "mm", "text", and "binary". -# -# Outputs: -# - W1, W2, W3, W4: Files containing the trained weights of the model. -# - b1, b2, b3, b4: Files containing the trained biases of the model. -# - accuracy: File containing the final accuracy on the test data. -# -# Data: -# The MNIST dataset contains labeled images of handwritten digits, -# where each example is a 28x28 pixel image of grayscale values in -# the range [0,255] stretched out as 784 pixels, and each label is -# one of 10 possible digits in [0,9]. -# -# Sample Invocation (running from outside the `nn` folder): -# 1. Download data (60,000 training examples, and 10,000 test examples) -# ``` -# nn/examples/get_mnist_data.sh -# ``` -# -# 2. Execute using Spark -# ``` -# spark-submit --master local[*] --driver-memory 10G -# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 -# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet-train.dml -# -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv -# C=1 Hin=28 Win=28 epochs=10 out_dir=nn/examples/model/mnist_lenet -# ``` -# -source("nn/examples/mnist_lenet.dml") as mnist_lenet - -# Read training data & settings -fmt = ifdef($fmt, "csv") -train = read($train, format=fmt) -test = read($test, format=fmt) -C = $C -Hin = $Hin -Win = $Win -epochs = ifdef($epochs, 10) -out_dir = ifdef($out_dir, ".") - -# Extract images and labels -images = train[,2:ncol(train)] -labels = train[,1] -X_test = test[,2:ncol(test)] -y_test = test[,1] - -# Scale images to [-1,1], and one-hot encode the labels -n = nrow(train) -n_test = nrow(test) -images = (images / 255.0) * 2 - 1 -labels = table(seq(1, n), labels+1, n, 10) -X_test = (X_test / 255.0) * 2 - 1 -y_test = table(seq(1, n_test), y_test+1, n_test, 10) - -# Split into training (55,000 examples) and validation (5,000 examples) -X = images[5001:nrow(images),] -X_val = images[1:5000,] -y = labels[5001:nrow(images),] -y_val = labels[1:5000,] - -# Train -[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs) - -# Write model out -write(W1, out_dir+"/W1") -write(b1, out_dir+"/b1") -write(W2, out_dir+"/W2") -write(b2, out_dir+"/b2") -write(W3, out_dir+"/W3") -write(b3, out_dir+"/b3") -write(W4, out_dir+"/W4") -write(b4, out_dir+"/b4") - -# Eval on test set -probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) -[loss, accuracy] = mnist_lenet::eval(probs, y_test) - -# Output results -print("Test Accuracy: " + accuracy) -write(accuracy, out_dir+"/accuracy") - -print("") -print("") - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml deleted file mode 100644 index e5755c4..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml +++ /dev/null @@ -1,331 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * MNIST LeNet Example - */ -# Imports -source("nn/layers/affine.dml") as affine -source("nn/layers/conv2d_builtin.dml") as conv2d -source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss -source("nn/layers/dropout.dml") as dropout -source("nn/layers/l2_reg.dml") as l2_reg -source("nn/layers/max_pool2d_builtin.dml") as max_pool2d -source("nn/layers/relu.dml") as relu -source("nn/layers/softmax.dml") as softmax -source("nn/optim/sgd_nesterov.dml") as sgd_nesterov - -train = function(matrix[double] X, matrix[double] y, - matrix[double] X_val, matrix[double] y_val, - int C, int Hin, int Win, int epochs) - return (matrix[double] W1, matrix[double] b1, - matrix[double] W2, matrix[double] b2, - matrix[double] W3, matrix[double] b3, - matrix[double] W4, matrix[double] b4) { - /* - * Trains a convolutional net using the "LeNet" architecture. - * - * The input matrix, X, has N examples, each represented as a 3D - * volume unrolled into a single vector. The targets, y, have K - * classes, and are one-hot encoded. - * - * Inputs: - * - X: Input data matrix, of shape (N, C*Hin*Win). - * - y: Target matrix, of shape (N, K). - * - X_val: Input validation data matrix, of shape (N, C*Hin*Win). - * - y_val: Target validation matrix, of shape (N, K). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - epochs: Total number of full training loops over the full data set. - * - * Outputs: - * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). - * - b1: 1st layer biases vector, of shape (F1, 1). - * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). - * - b2: 2nd layer biases vector, of shape (F2, 1). - * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). - * - b3: 3rd layer biases vector, of shape (1, N3). - * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). - * - b4: 4th layer biases vector, of shape (1, K). - */ - N = nrow(X) - K = ncol(y) - - # Create network: - # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax - Hf = 5 # filter height - Wf = 5 # filter width - stride = 1 - pad = 2 # For same dimensions, (Hf - stride) / 2 - - F1 = 32 # num conv filters in conv1 - F2 = 64 # num conv filters in conv2 - N3 = 512 # num nodes in affine3 - # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes) - - [W1, b1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) - [W2, b2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) - [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3) # inputs: (N, F2*(Hin/2/2)*(Win/2/2)) - [W4, b4] = affine::init(N3, K) # inputs: (N, N3) - W4 = W4 / sqrt(2) # different initialization, since being fed into softmax, instead of relu - - # Initialize SGD w/ Nesterov momentum optimizer - lr = 0.01 # learning rate - mu = 0.9 #0.5 # momentum - decay = 0.95 # learning rate decay constant - vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1) - vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2) - vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3) - vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4) - - # Regularization - lambda = 5e-04 - - # Optimize - print("Starting optimization") - batch_size = 64 - iters = ceil(N / batch_size) - for (e in 1:epochs) { - for(i in 1:iters) { - # Get next batch - beg = ((i-1) * batch_size) %% N + 1 - end = min(N, beg + batch_size - 1) - X_batch = X[beg:end,] - y_batch = y[beg:end,] - - # Compute forward pass - ## layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - outr1 = relu::forward(outc1) - [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - ## layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, - stride, stride, pad, pad) - outr2 = relu::forward(outc2) - [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - ## layer 3: affine3 -> relu3 -> dropout - outa3 = affine::forward(outp2, W3, b3) - outr3 = relu::forward(outa3) - [outd3, maskd3] = dropout::forward(outr3, 0.5, -1) - ## layer 4: affine4 -> softmax - outa4 = affine::forward(outd3, W4, b4) - probs = softmax::forward(outa4) - - # Compute loss & accuracy for training & validation data every 100 iterations. - if (i %% 100 == 0) { - # Compute training loss & accuracy - loss_data = cross_entropy_loss::forward(probs, y_batch) - loss_reg_W1 = l2_reg::forward(W1, lambda) - loss_reg_W2 = l2_reg::forward(W2, lambda) - loss_reg_W3 = l2_reg::forward(W3, lambda) - loss_reg_W4 = l2_reg::forward(W4, lambda) - loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + loss_reg_W4 - accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch)) - - # Compute validation loss & accuracy - probs_val = predict(X_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4) - loss_val = cross_entropy_loss::forward(probs_val, y_val) - accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val)) - - # Output results - print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " - + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) - } - - # Compute data backward pass - ## loss: - dprobs = cross_entropy_loss::backward(probs, y_batch) - ## layer 4: affine4 -> softmax - douta4 = softmax::backward(dprobs, outa4) - [doutd3, dW4, db4] = affine::backward(douta4, outr3, W4, b4) - ## layer 3: affine3 -> relu3 -> dropout - doutr3 = dropout::backward(doutd3, outr3, 0.5, maskd3) - douta3 = relu::backward(doutr3, outa3) - [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3) - ## layer 2: conv2 -> relu2 -> pool2 - doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - doutc2 = relu::backward(doutr2, outc2) - [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, - Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) - ## layer 1: conv1 -> relu1 -> pool1 - doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - doutc1 = relu::backward(doutr1, outc1) - [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, - Hf, Wf, stride, stride, pad, pad) - - # Compute regularization backward pass - dW1_reg = l2_reg::backward(W1, lambda) - dW2_reg = l2_reg::backward(W2, lambda) - dW3_reg = l2_reg::backward(W3, lambda) - dW4_reg = l2_reg::backward(W4, lambda) - dW1 = dW1 + dW1_reg - dW2 = dW2 + dW2_reg - dW3 = dW3 + dW3_reg - dW4 = dW4 + dW4_reg - - # Optimize with SGD w/ Nesterov momentum - [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1) - [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1) - [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2) - [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2) - [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3) - [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3) - [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4) - [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4) - } - # Anneal momentum towards 0.999 - #mu = mu + (0.999 - mu)/(1+epochs-e) - # Decay learning rate - lr = lr * decay - } -} - -predict = function(matrix[double] X, int C, int Hin, int Win, - matrix[double] W1, matrix[double] b1, - matrix[double] W2, matrix[double] b2, - matrix[double] W3, matrix[double] b3, - matrix[double] W4, matrix[double] b4) - return (matrix[double] probs) { - /* - * Computes the class probability predictions of a convolutional - * net using the "LeNet" architecture. - * - * The input matrix, X, has N examples, each represented as a 3D - * volume unrolled into a single vector. - * - * Inputs: - * - X: Input data matrix, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf). - * - b1: 1st layer biases vector, of shape (F1, 1). - * - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf). - * - b2: 2nd layer biases vector, of shape (F2, 1). - * - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3). - * - b3: 3rd layer biases vector, of shape (1, N3). - * - W4: 4th layer weights (parameters) matrix, of shape (N3, K). - * - b4: 4th layer biases vector, of shape (1, K). - * - * Outputs: - * - probs: Class probabilities, of shape (N, K). - */ - N = nrow(X) - - # Network: - # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax - Hf = 5 # filter height - Wf = 5 # filter width - stride = 1 - pad = 2 # For same dimensions, (Hf - stride) / 2 - - F1 = nrow(W1) # num conv filters in conv1 - F2 = nrow(W2) # num conv filters in conv2 - N3 = ncol(W3) # num nodes in affine3 - K = ncol(W4) # num nodes in affine4, equal to number of target dimensions (num classes) - - # Compute predictions over mini-batches - probs = matrix(0, rows=N, cols=K) - batch_size = 64 - iters = ceil(N / batch_size) - for(i in 1:iters) { - # Get next batch - beg = ((i-1) * batch_size) %% N + 1 - end = min(N, beg + batch_size - 1) - X_batch = X[beg:end,] - - # Compute forward pass - ## layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) - outr1 = relu::forward(outc1) - [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - ## layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, - stride, stride, pad, pad) - outr2 = relu::forward(outc2) - [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) - ## layer 3: affine3 -> relu3 - outa3 = affine::forward(outp2, W3, b3) - outr3 = relu::forward(outa3) - ## layer 4: affine4 -> softmax - outa4 = affine::forward(outr3, W4, b4) - probs_batch = softmax::forward(outa4) - - # Store predictions - probs[beg:end,] = probs_batch - } -} - -eval = function(matrix[double] probs, matrix[double] y) - return (double loss, double accuracy) { - /* - * Evaluates a convolutional net using the "LeNet" architecture. - * - * The probs matrix contains the class probability predictions - * of K classes over N examples. The targets, y, have K classes, - * and are one-hot encoded. - * - * Inputs: - * - probs: Class probabilities, of shape (N, K). - * - y: Target matrix, of shape (N, K). - * - * Outputs: - * - loss: Scalar loss, of shape (1). - * - accuracy: Scalar accuracy, of shape (1). - */ - # Compute loss & accuracy - loss = cross_entropy_loss::forward(probs, y) - correct_pred = rowIndexMax(probs) == rowIndexMax(y) - accuracy = mean(correct_pred) -} - -generate_dummy_data = function() - return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) { - /* - * Generate a dummy dataset similar to the MNIST dataset. - * - * Outputs: - * - X: Input data matrix, of shape (N, D). - * - y: Target matrix, of shape (N, K). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - */ - # Generate dummy input data - N = 1024 # num examples - C = 1 # num input channels - Hin = 28 # input height - Win = 28 # input width - K = 10 # num target classes - X = rand(rows=N, cols=C*Hin*Win, pdf="normal") - classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform")) - y = table(seq(1, N), classes) # one-hot encoding -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml deleted file mode 100644 index 4c8c434..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml +++ /dev/null @@ -1,77 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# MNIST Softmax - Predict -# -# This script computes the class probability predictions of a -# trained softmax classifier on images of handwritten digits. -# -# Inputs: -# - X: File containing training images. -# The format is "pixel_1, pixel_2, ..., pixel_n". -# - model_dir: Directory containing the trained weights and biases -# of the model. -# - out_dir: Directory to store class probability predictions for -# each image. -# - fmt: [DEFAULT: "csv"] File format of `X` and output predictions. -# Options include: "csv", "mm", "text", and "binary". -# -# Outputs: -# - probs: File containing class probability predictions for each -# image. -# -# Data: -# The X file should contain images of handwritten digits, -# where each example is a 28x28 pixel image of grayscale values in -# the range [0,255] stretched out as 784 pixels. -# -# Sample Invocation: -# 1. Download images. -# -# For example, save images to `nn/examples/data/mnist/images.csv`. -# -# 2. Execute using Spark -# ``` -# spark-submit --master local[*] --driver-memory 5G -# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 -# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-predict.dml -# -nvargs X=nn/examples/data/mnist/images.csv -# model_dir=nn/examples/model/mnist_softmax out_dir=nn/examples/data/mnist -# -source("nn/examples/mnist_softmax.dml") as mnist_softmax - -# Read training data -fmt = ifdef($fmt, "csv") -X = read($X, format=fmt) - -# Scale images to [0,1], and one-hot encode the labels -X = X / 255.0 - -# Read model coefficients -W = read($model_dir+"/W") -b = read($model_dir+"/b") - -# Predict classes -probs = mnist_softmax::predict(X, W, b) - -# Output results -write(probs, $out_dir+"/probs."+fmt, format=fmt) - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml deleted file mode 100644 index 09970f0..0000000 --- a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml +++ /dev/null @@ -1,110 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# MNIST Softmax - Train -# -# This script trains a softmax classifier on images of handwritten -# digits. -# -# Inputs: -# - train: File containing labeled MNIST training images. -# The format is "label, pixel_1, pixel_2, ..., pixel_n". -# - test: File containing labeled MNIST test images. -# The format is "label, pixel_1, pixel_2, ..., pixel_n". -# - out_dir: Directory to store weights and bias matrices of -# trained model, as well as final test accuracy. -# - fmt: [DEFAULT: "csv"] File format of `train` and `test` data. -# Options include: "csv", "mm", "text", and "binary". -# -# Outputs: -# - W: File containing the trained weights of the model. -# - b: File containing the trained biases of the model. -# - accuracy: File containing the final accuracy on the test data. -# -# Data: -# The MNIST dataset contains labeled images of handwritten digits, -# where each example is a 28x28 pixel image of grayscale values in -# the range [0,255] stretched out as 784 pixels, and each label is -# one of 10 possible digits in [0,9]. -# -# Sample Invocation (running from wihtin the `examples` folder): -# 1. Download data (60,000 training examples, and 10,000 test examples) -# ``` -# nn/examples/get_mnist_data.sh -# ``` -# -# 2. Execute using Spark -# ``` -# spark-submit --master local[*] --driver-memory 10G -# --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128 -# $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-train.dml -# -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv -# epochs=1 out_dir=nn/examples/model/mnist_softmax -# ``` -# -source("nn/examples/mnist_softmax.dml") as mnist_softmax - -# Read training data -fmt = ifdef($fmt, "csv") -train = read($train, format=fmt) -test = read($test, format=fmt) -epochs = ifdef($epochs, 1) -out_dir = ifdef($out_dir, ".") - -# Extract images and labels -images = train[,2:ncol(train)] -labels = train[,1] -X_test = test[,2:ncol(test)] -y_test = test[,1] - -# Scale images to [0,1], and one-hot encode the labels -n = nrow(train) -n_test = nrow(test) -classes = 10 -images = images / 255.0 -labels = table(seq(1, n), labels+1, n, classes) -X_test = X_test / 255.0 -y_test = table(seq(1, n_test), y_test+1, n_test, classes) - -# Split into training (55,000 examples) and validation (5,000 examples) -X = images[5001:nrow(images),] -X_val = images[1:5000,] -y = labels[5001:nrow(images),] -y_val = labels[1:5000,] - -# Train -[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs) - -# Write model out -write(W, out_dir+"/W") -write(b, out_dir+"/b") - -# Eval on test set -probs = mnist_softmax::predict(X_test, W, b) -[loss, accuracy] = mnist_softmax::eval(probs, y_test) - -# Output results -print("Test Accuracy: " + accuracy) -write(accuracy, out_dir+"/accuracy") - -print("") -print("") -
