[SYSTEMML-1453] Update Conv & Max Pooling layer names to include "2D"
This updates `conv*.dml` and `max_pool*.dml` to `conv2d*.dml` and `max_pool2d*.dml` to allow for 1D and 3D variants in the future. Closes #447. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/07039caa Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/07039caa Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/07039caa Branch: refs/heads/master Commit: 07039caa9629dd3a26aa66c9ec860cf7f7917724 Parents: 5c59e03 Author: Mike Dusenberry <[email protected]> Authored: Fri Mar 31 18:39:11 2017 -0700 Committer: Mike Dusenberry <[email protected]> Committed: Fri Mar 31 18:39:11 2017 -0700 ---------------------------------------------------------------------- projects/breast_cancer/convnet.dml | 101 +++++---- .../SystemML-NN/examples/mnist_lenet.dml | 56 ++--- scripts/staging/SystemML-NN/nn/layers/conv.dml | 194 ----------------- .../staging/SystemML-NN/nn/layers/conv2d.dml | 194 +++++++++++++++++ .../SystemML-NN/nn/layers/conv2d_builtin.dml | 160 ++++++++++++++ .../SystemML-NN/nn/layers/conv_builtin.dml | 155 ------------- .../staging/SystemML-NN/nn/layers/max_pool.dml | 159 -------------- .../SystemML-NN/nn/layers/max_pool2d.dml | 159 ++++++++++++++ .../nn/layers/max_pool2d_builtin.dml | 103 +++++++++ .../SystemML-NN/nn/layers/max_pool_builtin.dml | 103 --------- .../SystemML-NN/nn/test/conv2d_simple.dml | 215 +++++++++++++++++++ .../staging/SystemML-NN/nn/test/conv_simple.dml | 215 ------------------- .../staging/SystemML-NN/nn/test/grad_check.dml | 170 +++++++-------- .../SystemML-NN/nn/test/max_pool2d_simple.dml | 172 +++++++++++++++ .../SystemML-NN/nn/test/max_pool_simple.dml | 172 --------------- .../staging/SystemML-NN/nn/test/run_tests.dml | 16 +- scripts/staging/SystemML-NN/nn/test/test.dml | 115 +++++----- 17 files changed, 1248 insertions(+), 1211 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/projects/breast_cancer/convnet.dml ---------------------------------------------------------------------- diff --git a/projects/breast_cancer/convnet.dml b/projects/breast_cancer/convnet.dml index 5f115a2..85c7dd8 100644 --- a/projects/breast_cancer/convnet.dml +++ b/projects/breast_cancer/convnet.dml @@ -7,9 +7,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -24,11 +24,11 @@ */ # Imports source("nn/layers/affine.dml") as affine -source("nn/layers/conv_builtin.dml") as conv +source("nn/layers/conv2d_builtin.dml") as conv2d source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss source("nn/layers/dropout.dml") as dropout source("nn/layers/l2_reg.dml") as l2_reg -source("nn/layers/max_pool_builtin.dml") as max_pool +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d source("nn/layers/relu.dml") as relu source("nn/layers/softmax.dml") as softmax #source("nn/optim/adam.dml") as adam @@ -96,9 +96,9 @@ train = function(matrix[double] X, matrix[double] Y, F3 = 32 # num conv filters in conv3 N1 = 512 # num nodes in affine1 # Note: affine2 has K nodes, which is equal to the number of target dimensions (num classes) - [Wc1, bc1] = conv::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) - [Wc2, bc2] = conv::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) - [Wc3, bc3] = conv::init(F3, F2, Hf, Wf) # inputs: (N, F2*(Hin/2^2)*(Win/2^2)) + [Wc1, bc1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) + [Wc2, bc2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) + [Wc3, bc3] = conv2d::init(F3, F2, Hf, Wf) # inputs: (N, F2*(Hin/2^2)*(Win/2^2)) [Wa1, ba1] = affine::init(F3*(Hin/2^3)*(Win/2^3), N1) # inputs: (N, F3*(Hin/2^3)*(Win/2^3)) [Wa2, ba2] = affine::init(N1, K) # inputs: (N, N1) Wa2 = Wa2 / sqrt(2) # different initialization, since being fed into softmax, instead of relu @@ -145,17 +145,23 @@ train = function(matrix[double] X, matrix[double] Y, # Compute forward pass ## conv layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) outc1r = relu::forward(outc1) - [outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + [outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2) ## conv layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad) + [outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, + stride, stride, pad, pad) outc2r = relu::forward(outc2) - [outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + [outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2) ## conv layer 3: conv3 -> relu3 -> pool3 - [outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad) + [outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, + stride, stride, pad, pad) outc3r = relu::forward(outc3) - [outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2) + [outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, + strideh=2, stridew=2) ## affine layer 1: affine1 -> relu1 -> dropout1 outa1 = affine::forward(outc3p, Wa1, ba1) outa1r = relu::forward(outa1) @@ -176,17 +182,23 @@ train = function(matrix[double] X, matrix[double] Y, douta1 = relu::backward(douta1r, outa1) [doutc3p, dWa1, dba1] = affine::backward(douta1, outc3p, Wa1, ba1) ## conv layer 3: conv3 -> relu3 -> pool3 - doutc3r = max_pool::backward(doutc3p, Houtc3p, Woutc3p, outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2) + doutc3r = max_pool2d::backward(doutc3p, Houtc3p, Woutc3p, outc3r, F3, Houtc3, Woutc3, + Hf=2, Wf=2, strideh=2, stridew=2) doutc3 = relu::backward(doutc3r, outc3) - [doutc2p, dWc3, dbc3] = conv::backward(doutc3, Houtc3, Woutc3, outc2p, Wc3, bc2, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad) + [doutc2p, dWc3, dbc3] = conv2d::backward(doutc3, Houtc3, Woutc3, outc2p, Wc3, bc2, F2, + Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad) ## conv layer 2: conv2 -> relu2 -> pool2 - doutc2r = max_pool::backward(doutc2p, Houtc2p, Woutc2p, outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + doutc2r = max_pool2d::backward(doutc2p, Houtc2p, Woutc2p, outc2r, F2, Houtc2, Woutc2, + Hf=2, Wf=2, strideh=2, stridew=2) doutc2 = relu::backward(doutc2r, outc2) - [doutc1p, dWc2, dbc2] = conv::backward(doutc2, Houtc2, Woutc2, outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad) + [doutc1p, dWc2, dbc2] = conv2d::backward(doutc2, Houtc2, Woutc2, outc1p, Wc2, bc2, F1, + Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad) ## conv layer 1: conv1 -> relu1 -> pool1 - doutc1r = max_pool::backward(doutc1p, Houtc1p, Woutc1p, outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + doutc1r = max_pool2d::backward(doutc1p, Houtc1p, Woutc1p, outc1r, F1, Houtc1, Woutc1, + Hf=2, Wf=2, strideh=2, stridew=2) doutc1 = relu::backward(doutc1r, outc1) - [dX_batch, dWc1, dbc1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [dX_batch, dWc1, dbc1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, Wc1, bc1, C, + Hin, Win, Hf, Wf, stride, stride, pad, pad) # Compute regularization backward pass dWc1_reg = l2_reg::backward(Wc1, lambda) @@ -222,7 +234,7 @@ train = function(matrix[double] X, matrix[double] Y, #[ba1, mba1, vba1] = adam::update(ba1, dba1, lr, beta1, beta2, eps, t, mba1, vba1) #[Wa2, mWa2, vWa2] = adam::update(Wa2, dWa2, lr, beta1, beta2, eps, t, mWa2, vWa2) #[ba2, mba2, vba2] = adam::update(ba2, dba2, lr, beta1, beta2, eps, t, mba2, vba2) - + # Compute loss & accuracy for training & validation data every `log_interval` iterations. if (i %% log_interval == 0) { # Compute training loss & accuracy @@ -348,7 +360,8 @@ predict = function(matrix[double] X, int C, int Hin, int Win, N = nrow(X) # Network: - # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> conv3 -> relu3 -> pool3 -> affine1 -> relu1 -> affine2 -> softmax + # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> conv3 -> relu3 -> pool3 + # -> affine1 -> relu1 -> affine2 -> softmax Hf = 3 # filter height Wf = 3 # filter width stride = 1 @@ -365,17 +378,23 @@ predict = function(matrix[double] X, int C, int Hin, int Win, # so that it can be efficiently used for parallel predictions. ## Compute forward pass ### conv layer 1: conv1 -> relu1 -> pool1 - #[outc1, Houtc1, Woutc1] = conv::forward(X, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + #[outc1, Houtc1, Woutc1] = conv2d::forward(X, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, + # pad, pad) #outc1r = relu::forward(outc1) - #[outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + #[outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, + # strideh=2, stridew=2) ### conv layer 2: conv2 -> relu2 -> pool2 - #[outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad) + #[outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, + # stride, stride, pad, pad) #outc2r = relu::forward(outc2) - #[outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + #[outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, + # strideh=2, stridew=2) ### conv layer 3: conv3 -> relu3 -> pool3 - #[outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad) + #[outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, + # stride, stride, pad, pad) #outc3r = relu::forward(outc3) - #[outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2) + #[outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, + # strideh=2, stridew=2) ### affine layer 1: affine1 -> relu1 -> dropout #outa1 = affine::forward(outc3p, Wa1, ba1) #outa1r = relu::forward(outa1) @@ -398,17 +417,23 @@ predict = function(matrix[double] X, int C, int Hin, int Win, # Compute forward pass ## conv layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) outc1r = relu::forward(outc1) - [outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2) + [outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2) ## conv layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad) + [outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, + stride, stride, pad, pad) outc2r = relu::forward(outc2) - [outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2) + [outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2) ## conv layer 3: conv3 -> relu3 -> pool3 - [outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad) + [outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, + stride, stride, pad, pad) outc3r = relu::forward(outc3) - [outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2) + [outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, + strideh=2, stridew=2) ## affine layer 1: affine1 -> relu1 -> dropout outa1 = affine::forward(outc3p, Wa1, ba1) outa1r = relu::forward(outa1) @@ -433,7 +458,7 @@ eval = function(matrix[double] probs, matrix[double] Y) * * Inputs: * - probs: Class probabilities, of shape (N, K). - * - Y: Target matrix, of shape (N, + * - Y: Target matrix, of shape (N, * * Outputs: * - loss: Scalar loss, of shape (1). @@ -448,7 +473,7 @@ eval = function(matrix[double] probs, matrix[double] Y) generate_dummy_data = function() return (matrix[double] X, matrix[double] Y, int C, int Hin, int Win) { /* - * Generate a dummy dataset similar to the MNIST dataset. + * Generate a dummy dataset similar to the breast cancer dataset. * * Outputs: * - X: Input data matrix, of shape (N, D). @@ -459,9 +484,9 @@ generate_dummy_data = function() */ # Generate dummy input data N = 1024 # num examples - C = 1 # num input channels - Hin = 64 # input height - Win = 64 # input width + C = 3 # num input channels + Hin = 256 # input height + Win = 256 # input width K = 3 # num target classes X = rand(rows=N, cols=C*Hin*Win, pdf="normal") classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform")) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/examples/mnist_lenet.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml index f991487..e2895b8 100644 --- a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml +++ b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml @@ -24,11 +24,11 @@ */ # Imports source("nn/layers/affine.dml") as affine -source("nn/layers/conv_builtin.dml") as conv +source("nn/layers/conv2d_builtin.dml") as conv2d source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss source("nn/layers/dropout.dml") as dropout source("nn/layers/l2_reg.dml") as l2_reg -source("nn/layers/max_pool_builtin.dml") as max_pool +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d source("nn/layers/relu.dml") as relu source("nn/layers/softmax.dml") as softmax source("nn/optim/sgd_nesterov.dml") as sgd_nesterov @@ -81,8 +81,8 @@ train = function(matrix[double] X, matrix[double] y, N3 = 512 # num nodes in affine3 # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes) - [W1, b1] = conv::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) - [W2, b2] = conv::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) + [W1, b1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win) + [W2, b2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2)) [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3) # inputs: (N, F2*(Hin/2/2)*(Win/2/2)) [W4, b4] = affine::init(N3, K) # inputs: (N, N3) W4 = W4 / sqrt(2) # different initialization, since being fed into softmax, instead of relu @@ -114,17 +114,17 @@ train = function(matrix[double] X, matrix[double] y, # Compute forward pass ## layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) outr1 = relu::forward(outc1) - [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) ## layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, - stride, stride, pad, pad) + [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, + stride, stride, pad, pad) outr2 = relu::forward(outc2) - [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) ## layer 3: affine3 -> relu3 -> dropout outa3 = affine::forward(outp2, W3, b3) outr3 = relu::forward(outa3) @@ -165,17 +165,17 @@ train = function(matrix[double] X, matrix[double] y, douta3 = relu::backward(doutr3, outa3) [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3) ## layer 2: conv2 -> relu2 -> pool2 - doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) doutc2 = relu::backward(doutr2, outc2) - [doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, - Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) + [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1, + Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad) ## layer 1: conv1 -> relu1 -> pool1 - doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) doutc1 = relu::backward(doutr1, outc1) - [dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, - Hf, Wf, stride, stride, pad, pad) + [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win, + Hf, Wf, stride, stride, pad, pad) # Compute regularization backward pass dW1_reg = l2_reg::backward(W1, lambda) @@ -260,17 +260,17 @@ predict = function(matrix[double] X, int C, int Hin, int Win, # Compute forward pass ## layer 1: conv1 -> relu1 -> pool1 - [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) outr1 = relu::forward(outc1) - [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) ## layer 2: conv2 -> relu2 -> pool2 - [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, - stride, stride, pad, pad) + [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf, + stride, stride, pad, pad) outr2 = relu::forward(outc2) - [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, - strideh=2, stridew=2, pad=0, pad=0) + [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2, + strideh=2, stridew=2, pad=0, pad=0) ## layer 3: affine3 -> relu3 outa3 = affine::forward(outp2, W3, b3) outr3 = relu::forward(outa3) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv.dml b/scripts/staging/SystemML-NN/nn/layers/conv.dml deleted file mode 100644 index 435b3cf..0000000 --- a/scripts/staging/SystemML-NN/nn/layers/conv.dml +++ /dev/null @@ -1,194 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * 2D Convolutional layer. - */ -source("nn/util.dml") as util - -forward = function(matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial convolutional layer with - * F filters. The input data has N examples, each represented as a 3D - * volume unrolled into a single vector. - * - * This implementation uses `im2col` internally for each image to - * extract local image regions (patches) into columns, and then - * performs a matrix multiplication with the filters to compute the - * output maps. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * For same output height as input, set `padh = (Hf - 1) / 2`, - * assuming `strideh = 1`. - * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` - * preserves the spatial dimensions of the input. - * - padw: Padding for left and right sides. - * For same output width as input, set `padw = (Wf - 1) / 2`, - * assuming `stridew = 1`. - * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` - * preserves the spatial dimensions of the input. - * - * Outputs: - * - out: Outputs, of shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - F = nrow(W) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - - # Create output volume - out = matrix(0, rows=N, cols=F*Hout*Wout) - - # Convolution - im2col implementation - parfor (n in 1:N) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape - - # Pad image - Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw)) - - # Extract local image patches into columns with im2col, of shape (C*Hf*Wf, Hout*Wout) - Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) - - # Convolve patches with filters - outn = W %*% Xn_padded_cols + b # shape (F, Hout*Wout) - out[n,] = matrix(outn, rows=1, cols=F*Hout*Wout) # reshape - } -} - -backward = function(matrix[double] dout, int Hout, int Wout, - matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX, matrix[double] dW, matrix[double] db) { - /* - * Computes the backward pass for a 2D spatial convolutional layer - * with F filters. - * - * This implementation uses `im2col` and `col2im` internally. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * - padw: Padding for left and right sides. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). - * - db: Gradient wrt `b`, of shape (F, 1). - */ - N = nrow(X) - F = nrow(W) - - # Create gradient volumes - # Note: Create convenience gradient volumes for dW and db that will - # allow for one gradient to be stored per example, allowing for - # parallel computation at the expense of memory. We will reduce at - # the end. - dX = matrix(0, rows=N, cols=C*Hin*Win) - dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf) - dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1) - - # Partial derivatives for convolution - im2col implementation - parfor (n in 1:N) { # all examples - doutn = matrix(dout[n,], rows=F, cols=Hout*Wout) - - # Compute dW - Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape - Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw)) - Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) - # dW = dW + doutn %*% t(Xn_padded_cols) - dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf) - - # Compute db - # db = db + rowSums(doutn) - dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F) - - # Compute dX - dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout) - dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf, - strideh, stridew, "add") - dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw) - dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape - } - - # Reduce convenience gradient volumes with one gradient per example - # into single gradients for W and b. - dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf) - db = matrix(colSums(dbN), rows=F, cols=1) -} - -init = function(int F, int C, int Hf, int Wf) - return (matrix[double] W, matrix[double] b) { - /* - * Initialize the parameters of this layer. - * - * Note: This is just a convenience function, and parameters - * may be initialized manually if needed. - * - * We use the heuristic by He et al., which limits the magnification - * of inputs/gradients during forward/backward passes by scaling - * unit-Gaussian weights by a factor of sqrt(2/n), under the - * assumption of relu neurons. - * - http://arxiv.org/abs/1502.01852 - * - * Inputs: - * - F: Number of filters. - * - C: Number of input channels (dimensionality of depth). - * - Hf: Filter height. - * - Wf: Filter width. - * - * Outputs: - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - */ - W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) - b = matrix(0, rows=F, cols=1) -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv2d.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml new file mode 100644 index 0000000..435b3cf --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml @@ -0,0 +1,194 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Convolutional layer. + */ +source("nn/util.dml") as util + +forward = function(matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial convolutional layer with + * F filters. The input data has N examples, each represented as a 3D + * volume unrolled into a single vector. + * + * This implementation uses `im2col` internally for each image to + * extract local image regions (patches) into columns, and then + * performs a matrix multiplication with the filters to compute the + * output maps. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * For same output height as input, set `padh = (Hf - 1) / 2`, + * assuming `strideh = 1`. + * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` + * preserves the spatial dimensions of the input. + * - padw: Padding for left and right sides. + * For same output width as input, set `padw = (Wf - 1) / 2`, + * assuming `stridew = 1`. + * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` + * preserves the spatial dimensions of the input. + * + * Outputs: + * - out: Outputs, of shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + + # Create output volume + out = matrix(0, rows=N, cols=F*Hout*Wout) + + # Convolution - im2col implementation + parfor (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape + + # Pad image + Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw)) + + # Extract local image patches into columns with im2col, of shape (C*Hf*Wf, Hout*Wout) + Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) + + # Convolve patches with filters + outn = W %*% Xn_padded_cols + b # shape (F, Hout*Wout) + out[n,] = matrix(outn, rows=1, cols=F*Hout*Wout) # reshape + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, + matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX, matrix[double] dW, matrix[double] db) { + /* + * Computes the backward pass for a 2D spatial convolutional layer + * with F filters. + * + * This implementation uses `im2col` and `col2im` internally. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). + * - db: Gradient wrt `b`, of shape (F, 1). + */ + N = nrow(X) + F = nrow(W) + + # Create gradient volumes + # Note: Create convenience gradient volumes for dW and db that will + # allow for one gradient to be stored per example, allowing for + # parallel computation at the expense of memory. We will reduce at + # the end. + dX = matrix(0, rows=N, cols=C*Hin*Win) + dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf) + dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1) + + # Partial derivatives for convolution - im2col implementation + parfor (n in 1:N) { # all examples + doutn = matrix(dout[n,], rows=F, cols=Hout*Wout) + + # Compute dW + Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape + Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw)) + Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) + # dW = dW + doutn %*% t(Xn_padded_cols) + dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf) + + # Compute db + # db = db + rowSums(doutn) + dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F) + + # Compute dX + dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout) + dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf, + strideh, stridew, "add") + dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw) + dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape + } + + # Reduce convenience gradient volumes with one gradient per example + # into single gradients for W and b. + dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf) + db = matrix(colSums(dbN), rows=F, cols=1) +} + +init = function(int F, int C, int Hf, int Wf) + return (matrix[double] W, matrix[double] b) { + /* + * Initialize the parameters of this layer. + * + * Note: This is just a convenience function, and parameters + * may be initialized manually if needed. + * + * We use the heuristic by He et al., which limits the magnification + * of inputs/gradients during forward/backward passes by scaling + * unit-Gaussian weights by a factor of sqrt(2/n), under the + * assumption of relu neurons. + * - http://arxiv.org/abs/1502.01852 + * + * Inputs: + * - F: Number of filters. + * - C: Number of input channels (dimensionality of depth). + * - Hf: Filter height. + * - Wf: Filter width. + * + * Outputs: + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + */ + W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) + b = matrix(0, rows=F, cols=1) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml new file mode 100644 index 0000000..29021cf --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml @@ -0,0 +1,160 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Convolutional layer. + * + * This implementation uses a built-in operator for higher performance. + */ + +forward = function(matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial convolutional layer with + * F filters. The input data has N examples, each represented as a 3D + * volume unrolled into a single vector. + * + * This implementation uses a built-in operator for higher + * performance. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * For same output height as input, set `padh = (Hf - 1) / 2`, + * assuming `strideh = 1`. + * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` + * preserves the spatial dimensions of the input. + * - padw: Padding for left and right sides. + * For same output width as input, set `padw = (Wf - 1) / 2`, + * assuming `stridew = 1`. + * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` + * preserves the spatial dimensions of the input. + * + * Outputs: + * - out: Outputs, of shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + + # Convolution - built-in implementation + out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf], + stride=[strideh,stridew], padding=[padh,padw]) + + # Add bias term to each output filter + out = bias_add(out, b) +} + +backward = function(matrix[double] dout, int Hout, int Wout, + matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX, matrix[double] dW, matrix[double] db) { + /* + * Computes the backward pass for a 2D spatial convolutional layer + * with F filters. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * For same output height as input, set `padh = (Hf - 1) / 2`, + * assuming `strideh = 1`. + * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` + * preserves the spatial dimensions of the input. + * - padw: Padding for left and right sides. + * For same output width as input, set `padw = (Wf - 1) / 2`, + * assuming `stridew = 1`. + * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` + * preserves the spatial dimensions of the input. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). + * - db: Gradient wrt `b`, of shape (F, 1). + */ + N = nrow(X) + F = nrow(W) + + # Partial derivatives for convolution - built-in implementation + dW = conv2d_backward_filter(X, dout, stride=[strideh,stridew], padding=[padh,padw], + input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf]) + dX = conv2d_backward_data(W, dout, stride=[strideh, stridew], padding=[padh,padw], + input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf]) + + # Partial derivatives for bias vector + db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout)) +} + +init = function(int F, int C, int Hf, int Wf) + return (matrix[double] W, matrix[double] b) { + /* + * Initialize the parameters of this layer. + * + * Note: This is just a convenience function, and parameters + * may be initialized manually if needed. + * + * We use the heuristic by He et al., which limits the magnification + * of inputs/gradients during forward/backward passes by scaling + * unit-Gaussian weights by a factor of sqrt(2/n), under the + * assumption of relu neurons. + * - http://arxiv.org/abs/1502.01852 + * + * Inputs: + * - F: Number of filters. + * - C: Number of input channels (dimensionality of depth). + * - Hf: Filter height. + * - Wf: Filter width. + * + * Outputs: + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + */ + W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) + b = matrix(0, rows=F, cols=1) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml deleted file mode 100644 index c2b809e..0000000 --- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml +++ /dev/null @@ -1,155 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * 2D Convolutional layer. - */ - -forward = function(matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial convolutional layer with - * F filters. The input data has N examples, each represented as a 3D - * volume unrolled into a single vector. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * For same output height as input, set `padh = (Hf - 1) / 2`, - * assuming `strideh = 1`. - * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` - * preserves the spatial dimensions of the input. - * - padw: Padding for left and right sides. - * For same output width as input, set `padw = (Wf - 1) / 2`, - * assuming `stridew = 1`. - * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` - * preserves the spatial dimensions of the input. - * - * Outputs: - * - out: Outputs, of shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - F = nrow(W) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - - # Convolution - built-in implementation - out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf], - stride=[strideh,stridew], padding=[padh,padw]) - - # Add bias term to each output filter - out = bias_add(out, b) -} - -backward = function(matrix[double] dout, int Hout, int Wout, - matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX, matrix[double] dW, matrix[double] db) { - /* - * Computes the backward pass for a 2D spatial convolutional layer - * with F filters. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * For same output height as input, set `padh = (Hf - 1) / 2`, - * assuming `strideh = 1`. - * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2` - * preserves the spatial dimensions of the input. - * - padw: Padding for left and right sides. - * For same output width as input, set `padw = (Wf - 1) / 2`, - * assuming `stridew = 1`. - * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2` - * preserves the spatial dimensions of the input. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). - * - db: Gradient wrt `b`, of shape (F, 1). - */ - N = nrow(X) - F = nrow(W) - - # Partial derivatives for convolution - built-in implementation - dW = conv2d_backward_filter(X, dout, stride=[strideh,stridew], padding=[padh,padw], - input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf]) - dX = conv2d_backward_data(W, dout, stride=[strideh, stridew], padding=[padh,padw], - input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf]) - - # Partial derivatives for bias vector - db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout)) -} - -init = function(int F, int C, int Hf, int Wf) - return (matrix[double] W, matrix[double] b) { - /* - * Initialize the parameters of this layer. - * - * Note: This is just a convenience function, and parameters - * may be initialized manually if needed. - * - * We use the heuristic by He et al., which limits the magnification - * of inputs/gradients during forward/backward passes by scaling - * unit-Gaussian weights by a factor of sqrt(2/n), under the - * assumption of relu neurons. - * - http://arxiv.org/abs/1502.01852 - * - * Inputs: - * - F: Number of filters. - * - C: Number of input channels (dimensionality of depth). - * - Hf: Filter height. - * - Wf: Filter width. - * - * Outputs: - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - */ - W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) - b = matrix(0, rows=F, cols=1) -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool.dml deleted file mode 100644 index a12877f..0000000 --- a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml +++ /dev/null @@ -1,159 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Max pooling layer. - */ -source("nn/util.dml") as util - -forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * This implementation uses `im2col` internally for each image to - * extract local image regions (patches) of each channel slice into - * columns, and then performs max pooling over the patches to compute - * the output maps. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - out: Outputs, of shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - pad_value = -1/0 # in max pooling we pad with -infinity - - # Create output volume - out = matrix(0, rows=N, cols=C*Hout*Wout) - - # Max pooling - im2col implementation - parfor (n in 1:N) { # all examples - img = matrix(X[n,], rows=C, cols=Hin*Win) # reshape - - if (padh > 0 | padw > 0) { - # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw)) - img = util::pad_image(img, Hin, Win, padh, padw, pad_value) - } - - img_maxes = matrix(0, rows=C, cols=Hout*Wout) # zeros - parfor (c in 1:C) { # all channels - # Extract local image slice patches into columns with im2col, of shape (Hf*Wf, Hout*Wout) - img_slice_cols = util::im2col(img[c,], Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) - - # Max pooling on patches - img_maxes[c,] = colMaxs(img_slice_cols) - } - - out[n,] = matrix(img_maxes, rows=1, cols=C*Hout*Wout) - } -} - -backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX) { - /* - * Computes the backward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Input data matrix, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - */ - N = nrow(X) - pad_value = -1/0 # in max pooling we pad with -infinity - - # Create gradient volume - dX = matrix(0, rows=N, cols=C*Hin*Win) - - # Gradient of max pooling - parfor (n in 1:N, check=0) { # all examples - img = matrix(X[n,], rows=C, cols=Hin*Win) - if (padh > 0 | padw > 0) { - # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw)) - img = util::pad_image(img, Hin, Win, padh, padw, pad_value) - } - - dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) - parfor (c in 1:C, check=0) { # all channels - img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) - dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) - for (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1)*strideh + 1 - for (wout in 1:Wout) { # all output columns - win = (wout-1)*stridew + 1 - img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] - max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix - # gradient passes through only for the max value(s) in this patch - dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] - dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1] - + dimg_slice_patch - } - } - dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) - } - - if (padh > 0 | padw > 0) { - # Unpad image gradient - dimg = util::unpad_image(dimg, Hin, Win, padh, padw) # shape (C, (Hin+2*padh)*(Win+2*padw)) - } - dX[n,] = matrix(dimg, rows=1, cols=C*Hin*Win) - } -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml new file mode 100644 index 0000000..229b7b9 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml @@ -0,0 +1,159 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Max Pooling layer. + */ +source("nn/util.dml") as util + +forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * This implementation uses `im2col` internally for each image to + * extract local image regions (patches) of each channel slice into + * columns, and then performs max pooling over the patches to compute + * the output maps. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - out: Outputs, of shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + pad_value = -1/0 # in max pooling we pad with -infinity + + # Create output volume + out = matrix(0, rows=N, cols=C*Hout*Wout) + + # Max pooling - im2col implementation + parfor (n in 1:N) { # all examples + img = matrix(X[n,], rows=C, cols=Hin*Win) # reshape + + if (padh > 0 | padw > 0) { + # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw)) + img = util::pad_image(img, Hin, Win, padh, padw, pad_value) + } + + img_maxes = matrix(0, rows=C, cols=Hout*Wout) # zeros + parfor (c in 1:C) { # all channels + # Extract local image slice patches into columns with im2col, of shape (Hf*Wf, Hout*Wout) + img_slice_cols = util::im2col(img[c,], Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) + + # Max pooling on patches + img_maxes[c,] = colMaxs(img_slice_cols) + } + + out[n,] = matrix(img_maxes, rows=1, cols=C*Hout*Wout) + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX) { + /* + * Computes the backward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + */ + N = nrow(X) + pad_value = -1/0 # in max pooling we pad with -infinity + + # Create gradient volume + dX = matrix(0, rows=N, cols=C*Hin*Win) + + # Gradient of max pooling + parfor (n in 1:N, check=0) { # all examples + img = matrix(X[n,], rows=C, cols=Hin*Win) + if (padh > 0 | padw > 0) { + # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw)) + img = util::pad_image(img, Hin, Win, padh, padw, pad_value) + } + + dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) + parfor (c in 1:C, check=0) { # all channels + img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) + dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) + for (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1)*strideh + 1 + for (wout in 1:Wout) { # all output columns + win = (wout-1)*stridew + 1 + img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] + max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix + # gradient passes through only for the max value(s) in this patch + dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] + dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1] + + dimg_slice_patch + } + } + dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) + } + + if (padh > 0 | padw > 0) { + # Unpad image gradient + dimg = util::unpad_image(dimg, Hin, Win, padh, padw) # shape (C, (Hin+2*padh)*(Win+2*padw)) + } + dX[n,] = matrix(dimg, rows=1, cols=C*Hin*Win) + } +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml new file mode 100644 index 0000000..be4e195 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml @@ -0,0 +1,103 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Max Pooling layer. + * + * This implementation uses a built-in operator for higher performance. + */ + +forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * This implementation uses a built-in operator for higher + * performance. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - out: Outputs, of shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + Hout = as.integer((Hin-Hf)/strideh + 1) + Wout = as.integer((Win-Wf)/stridew + 1) + + # Max pooling - built-in implementation + out = max_pool(X, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf], + stride=[strideh,stridew], padding=[padh,padw]) +} + +backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX) { + /* + * Computes the backward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + */ + N = nrow(X) + + # Gradient of max pooling + dX = max_pool_backward(X, dout, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf], + stride=[strideh,stridew], padding=[padh,padw]) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml deleted file mode 100644 index f1cb863..0000000 --- a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml +++ /dev/null @@ -1,103 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Max pooling layer. - */ - -forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * This implementation uses `im2col` internally for each image to - * extract local image regions (patches) of each channel slice into - * columns, and then performs max pooling over the patches to compute - * the output maps. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - out: Outputs, of shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - Hout = as.integer((Hin-Hf)/strideh + 1) - Wout = as.integer((Win-Wf)/stridew + 1) - - # Max pooling - built-in implementation - out = max_pool(X, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf], - stride=[strideh,stridew], padding=[padh,padw]) -} - -backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX) { - /* - * Computes the backward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - */ - N = nrow(X) - - # Gradient of max pooling - dX = max_pool_backward(X, dout, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf], - stride=[strideh,stridew], padding=[padh,padw]) -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml new file mode 100644 index 0000000..efd99c3 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml @@ -0,0 +1,215 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Convolutional layer. + * + * This implementation is intended to be a simple, reference version. + */ + +forward = function(matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial convolutional layer with + * F filters. The input data has N examples, each represented as a 3D + * volume unrolled into a single vector. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - out: Outputs, of shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + + # Create output volume + out = matrix(0, rows=N, cols=F*Hout*Wout) + + # Convolution - Simple reference implementation + parfor (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + # Pad image + Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + # Convolve image with filters + parfor (f in 1:F, check=0) { # all filters + parfor (hout in 1:Hout, check=0) { # all output rows + h0 = (hout-1)*strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + w0 = (wout-1)*stridew + 1 + # Create a patch of the input example corresponding spatially to the filter sizes + Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C, check=0) { + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape + Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1, + cols=Hf*Wf) # reshape + } + out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] = + W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,] + } + } + } + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, + matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX, matrix[double] dW, matrix[double] db) { + /* + * Computes the backward pass for a 2D spatial convolutional layer + * with F filters. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). + * - db: Gradient wrt `b`, of shape (F, 1). + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + + # Create gradient volumes + dX = matrix(0, rows=N, cols=C*Hin*Win) + dW = matrix(0, rows=F, cols=C*Hf*Wf) + db = matrix(0, rows=F, cols=1) + + # Partial derivatives for convolution - Simple reference implementation + for (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + # Pad image + Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) + for (f in 1:F) { # all filters + for (hout in 1:Hout) { # all output rows + h0 = (hout-1) * strideh + 1 + for (wout in 1:Wout) { # all output columns + w0 = (wout-1) * stridew + 1 + # Create a patch of the input example corresponding spatially to the filter sizes + Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout], + rows=C, cols=Hf*Wf) # reshape + for (c in 1:C) { + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape + Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], + rows=1, cols=Hf*Wf) # reshape + dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) + dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,], + rows=Hf, cols=Wf) # reshape + dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice, + rows=1, cols=(Hin+2*padh)*(Win+2*padw)) + } + dW[f,] = dW[f,] + + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf) + * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] + db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] + } + } + } + # Unpad derivs on input + dXn = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C, check=0) { + dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) + dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] + dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) + } + dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) + } +} + +init = function(int F, int C, int Hf, int Wf) + return (matrix[double] W, matrix[double] b) { + /* + * Initialize the parameters of this layer. + * + * We use the heuristic by He et al., which limits the magnification + * of inputs/gradients during forward/backward passes by scaling + * unit-Gaussian weights by a factor of sqrt(2/n), under the + * assumption of relu neurons. + * - http://arxiv.org/abs/1502.01852 + * + * Inputs: + * - F: Number of filters. + * - C: Number of input channels (dimensionality of depth). + * - Hf: Filter height. + * - Wf: Filter width. + * + * Outputs: + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). + */ + W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) + b = matrix(0, rows=F, cols=1) +} +
