http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/conv_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml deleted file mode 100644 index efd99c3..0000000 --- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml +++ /dev/null @@ -1,215 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * 2D Convolutional layer. - * - * This implementation is intended to be a simple, reference version. - */ - -forward = function(matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial convolutional layer with - * F filters. The input data has N examples, each represented as a 3D - * volume unrolled into a single vector. - * - * This implementation is intended to be a simple, reference version. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * - padw: Padding for left and right sides. - * - * Outputs: - * - out: Outputs, of shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - F = nrow(W) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - - # Create output volume - out = matrix(0, rows=N, cols=F*Hout*Wout) - - # Convolution - Simple reference implementation - parfor (n in 1:N) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - # Pad image - Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - # Convolve image with filters - parfor (f in 1:F, check=0) { # all filters - parfor (hout in 1:Hout, check=0) { # all output rows - h0 = (hout-1)*strideh + 1 - parfor (wout in 1:Wout, check=0) { # all output columns - w0 = (wout-1)*stridew + 1 - # Create a patch of the input example corresponding spatially to the filter sizes - Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros - parfor (c in 1:C, check=0) { - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape - Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1, - cols=Hf*Wf) # reshape - } - out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] = - W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,] - } - } - } - } -} - -backward = function(matrix[double] dout, int Hout, int Wout, - matrix[double] X, matrix[double] W, matrix[double] b, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX, matrix[double] dW, matrix[double] db) { - /* - * Computes the backward pass for a 2D spatial convolutional layer - * with F filters. - * - * This implementation is intended to be a simple, reference version. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, F*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * - padw: Padding for left and right sides. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). - * - db: Gradient wrt `b`, of shape (F, 1). - */ - N = nrow(X) - F = nrow(W) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - - # Create gradient volumes - dX = matrix(0, rows=N, cols=C*Hin*Win) - dW = matrix(0, rows=F, cols=C*Hf*Wf) - db = matrix(0, rows=F, cols=1) - - # Partial derivatives for convolution - Simple reference implementation - for (n in 1:N) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - # Pad image - Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) - for (f in 1:F) { # all filters - for (hout in 1:Hout) { # all output rows - h0 = (hout-1) * strideh + 1 - for (wout in 1:Wout) { # all output columns - w0 = (wout-1) * stridew + 1 - # Create a patch of the input example corresponding spatially to the filter sizes - Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros - dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout], - rows=C, cols=Hf*Wf) # reshape - for (c in 1:C) { - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape - Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], - rows=1, cols=Hf*Wf) # reshape - dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) - dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,], - rows=Hf, cols=Wf) # reshape - dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice, - rows=1, cols=(Hin+2*padh)*(Win+2*padw)) - } - dW[f,] = dW[f,] - + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf) - * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] - db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] - } - } - } - # Unpad derivs on input - dXn = matrix(0, rows=C, cols=Hin*Win) - parfor (c in 1:C, check=0) { - dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) - dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] - dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) - } - dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) - } -} - -init = function(int F, int C, int Hf, int Wf) - return (matrix[double] W, matrix[double] b) { - /* - * Initialize the parameters of this layer. - * - * We use the heuristic by He et al., which limits the magnification - * of inputs/gradients during forward/backward passes by scaling - * unit-Gaussian weights by a factor of sqrt(2/n), under the - * assumption of relu neurons. - * - http://arxiv.org/abs/1502.01852 - * - * Inputs: - * - F: Number of filters. - * - C: Number of input channels (dimensionality of depth). - * - Hf: Filter height. - * - Wf: Filter width. - * - * Outputs: - * - W: Weights, of shape (F, C*Hf*Wf). - * - b: Biases, of shape (F, 1). - */ - W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) - b = matrix(0, rows=F, cols=1) -} -
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml index ba9a317..27f4420 100644 --- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml +++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml @@ -24,8 +24,8 @@ */ source("nn/layers/affine.dml") as affine source("nn/layers/batch_norm.dml") as batch_norm -source("nn/layers/conv.dml") as conv -source("nn/layers/conv_builtin.dml") as conv_builtin +source("nn/layers/conv2d.dml") as conv2d +source("nn/layers/conv2d_builtin.dml") as conv2d_builtin source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss source("nn/layers/dropout.dml") as dropout source("nn/layers/l1_loss.dml") as l1_loss @@ -34,16 +34,16 @@ source("nn/layers/l2_loss.dml") as l2_loss source("nn/layers/l2_reg.dml") as l2_reg source("nn/layers/log_loss.dml") as log_loss source("nn/layers/lstm.dml") as lstm -source("nn/layers/max_pool.dml") as max_pool -source("nn/layers/max_pool_builtin.dml") as max_pool_builtin +source("nn/layers/max_pool2d.dml") as max_pool2d +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin source("nn/layers/relu.dml") as relu source("nn/layers/rnn.dml") as rnn source("nn/layers/sigmoid.dml") as sigmoid source("nn/layers/softmax.dml") as softmax source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm source("nn/layers/tanh.dml") as tanh -source("nn/test/conv_simple.dml") as conv_simple -source("nn/test/max_pool_simple.dml") as max_pool_simple +source("nn/test/conv2d_simple.dml") as conv2d_simple +source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple source("nn/test/util.dml") as test_util affine = function() { @@ -229,11 +229,11 @@ batch_norm = function() { } } -conv = function() { +conv2d = function() { /* - * Gradient check for the convolutional layer using `im2col`. + * Gradient check for the 2D convolutional layer using `im2col`. */ - print("Grad checking the `im2col` convolutional layer with L2 loss.") + print("Grad checking the `im2col` 2D convolutional layer with L2 loss.") # Generate data N = 2 # num examples @@ -249,13 +249,13 @@ conv = function() { y = rand(rows=N, cols=F*Hin*Win) # Create layers - [W, b] = conv::init(F, C, Hf, Wf) + [W, b] = conv2d::init(F, C, Hf, Wf) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) dout = l2_loss::backward(out, y) - [dX, dW, db] = conv::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [dX, dW, db] = conv2d::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) # Grad check h = 1e-5 @@ -265,10 +265,10 @@ conv = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -284,10 +284,10 @@ conv = function() { # Compute numerical derivative old = as.scalar(W[i,j]) W[i,j] = old - h - [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossmh = l2_loss::forward(outmh, y) W[i,j] = old + h - [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset dW_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -303,10 +303,10 @@ conv = function() { # Compute numerical derivative old = as.scalar(b[i,j]) b[i,j] = old - h - [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossmh = l2_loss::forward(outmh, y) b[i,j] = old + h - [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset db_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -317,12 +317,12 @@ conv = function() { } } -conv_builtin = function() { +conv2d_builtin = function() { /* - * Gradient check for the convolutional layer using built-in + * Gradient check for the 2D convolutional layer using built-in * functions. */ - print("Grad checking the built-in convolutional layer with L2 loss.") + print("Grad checking the built-in 2D convolutional layer with L2 loss.") # Generate data N = 2 # num examples @@ -338,13 +338,14 @@ conv_builtin = function() { y = rand(rows=N, cols=F*Hin*Win) # Create layers - [W, b] = conv_builtin::init(F, C, Hf, Wf) + [W, b] = conv2d_builtin::init(F, C, Hf, Wf) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) dout = l2_loss::backward(out, y) - [dX, dW, db] = conv_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [dX, dW, db] = conv2d_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # Grad check h = 1e-5 @@ -354,12 +355,12 @@ conv_builtin = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -375,12 +376,12 @@ conv_builtin = function() { # Compute numerical derivative old = as.scalar(W[i,j]) W[i,j] = old - h - [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) W[i,j] = old + h - [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset dW_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -396,12 +397,12 @@ conv_builtin = function() { # Compute numerical derivative old = as.scalar(b[i,j]) b[i,j] = old - h - [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) b[i,j] = old + h - [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset db_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -412,11 +413,11 @@ conv_builtin = function() { } } -conv_simple = function() { +conv2d_simple = function() { /* - * Gradient check for the simple reference convolutional layer. + * Gradient check for the simple reference 2D convolutional layer. */ - print("Grad checking the simple reference convolutional layer with L2 loss.") + print("Grad checking the simple reference 2D convolutional layer with L2 loss.") # Generate data N = 2 # num examples @@ -432,13 +433,13 @@ conv_simple = function() { y = rand(rows=N, cols=F*Hin*Win) # Create layers - [W, b] = conv_simple::init(F, C, Hf, Wf) + [W, b] = conv2d_simple::init(F, C, Hf, Wf) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) dout = l2_loss::backward(out, y) - [dX, dW, db] = conv_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [dX, dW, db] = conv2d_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # Grad check h = 1e-5 @@ -448,12 +449,12 @@ conv_simple = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -469,12 +470,12 @@ conv_simple = function() { # Compute numerical derivative old = as.scalar(W[i,j]) W[i,j] = old - h - [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) W[i,j] = old + h - [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset dW_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -490,12 +491,12 @@ conv_simple = function() { # Compute numerical derivative old = as.scalar(b[i,j]) b[i,j] = old - h - [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) b[i,j] = old + h - [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset db_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -898,11 +899,11 @@ lstm = function() { } } -max_pool = function() { +max_pool2d = function() { /* - * Gradient check for the max pooling layer. + * Gradient check for the 2D max pooling layer. */ - print("Grad checking the max pooling layer with L2 loss.") + print("Grad checking the 2D max pooling layer with L2 loss.") # Generate data N = 2 # num examples @@ -921,9 +922,9 @@ max_pool = function() { y = rand(rows=N, cols=C*Hout*Wout) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) dout = l2_loss::backward(out, y) - dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) # Grad check h = 1e-5 @@ -932,10 +933,10 @@ max_pool = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outmh, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [outph, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -947,11 +948,11 @@ max_pool = function() { } } -max_pool_builtin = function() { +max_pool2d_builtin = function() { /* - * Gradient check for the max pooling layer. + * Gradient check for the 2D max pooling layer. */ - print("Grad checking the built-in max pooling layer with L2 loss.") + print("Grad checking the built-in 2D max pooling layer with L2 loss.") # Generate data N = 2 # num examples @@ -970,10 +971,11 @@ max_pool_builtin = function() { y = rand(rows=N, cols=C*Hout*Wout) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) dout = l2_loss::backward(out, y) - dX = max_pool_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + dX = max_pool2d_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) # Grad check h = 1e-5 @@ -982,12 +984,12 @@ max_pool_builtin = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative @@ -999,11 +1001,11 @@ max_pool_builtin = function() { } } -max_pool_simple = function() { +max_pool2d_simple = function() { /* - * Gradient check for the simple reference max pooling layer. + * Gradient check for the simple reference 2D max pooling layer. */ - print("Grad checking the simple reference max pooling layer with L2 loss.") + print("Grad checking the simple reference 2D max pooling layer with L2 loss.") # Generate data N = 2 # num examples @@ -1022,10 +1024,10 @@ max_pool_simple = function() { y = rand(rows=N, cols=C*Hout*Wout) # Compute analytical gradients of loss wrt parameters - [out, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) dout = l2_loss::backward(out, y) - dX = max_pool_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + dX = max_pool2d_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) # Grad check h = 1e-5 @@ -1034,12 +1036,12 @@ max_pool_simple = function() { # Compute numerical derivative old = as.scalar(X[i,j]) X[i,j] = old - h - [outmh, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outmh, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossmh = l2_loss::forward(outmh, y) X[i,j] = old + h - [outph, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, - pad, pad) + [outph, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, + pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset dX_num = (lossph-lossmh) / (2*h) # numerical derivative http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml new file mode 100644 index 0000000..47dab3a --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml @@ -0,0 +1,172 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Max Pooling layer. + * + * This implementation is intended to be a simple, reference version. + */ + +forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - out: Outputs, of shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) + + # Create output volume + out = matrix(0, rows=N, cols=C*Hout*Wout) + + # Max pooling + parfor (n in 1:N, check=0) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + + # Pad image + pad_value = -1/0 + Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw)) + + parfor (c in 1:C, check=0) { # all channels + img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) + parfor (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1) * strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + win = (wout-1) * stridew + 1 + out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1, + win:win+Wf-1]) + } + } + } + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX) { + /* + * Computes the backward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream, of + * shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Inputs, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * A typical value is 0. + * - padw: Padding for left and right sides. + * A typical value is 0. + * + * Outputs: + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + */ + N = nrow(X) + + # Create gradient volume + dX = matrix(0, rows=N, cols=C*Hin*Win) + + # Gradient of max pooling + for (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + + # Pad image + pad_value = -1/0 + Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + img = Xn_padded + + dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) + for (c in 1:C) { # all channels + img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) + dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) + for (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1) * strideh + 1 + for (wout in 1:Wout) { # all output columns + win = (wout-1) * stridew + 1 + img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] + max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix + # gradient passes through only for the max value(s) in this patch + dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] + dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1] + + dimg_slice_patch + } + } + dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) + } + + # Unpad derivs on input + dXn = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C, check=0) { + dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) + dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] + dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) + } + dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) + } +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml deleted file mode 100644 index 786b0a1..0000000 --- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml +++ /dev/null @@ -1,172 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -/* - * Max pooling layer. - * - * This implementation is intended to be a simple, reference version. - */ - -forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] out, int Hout, int Wout) { - /* - * Computes the forward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * This implementation is intended to be a simple, reference version. - * - * Inputs: - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - out: Outputs, of shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - */ - N = nrow(X) - Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) - Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) - - # Create output volume - out = matrix(0, rows=N, cols=C*Hout*Wout) - - # Max pooling - parfor (n in 1:N, check=0) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - - # Pad image - pad_value = -1/0 - Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw)) - - parfor (c in 1:C, check=0) { # all channels - img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) - parfor (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1) * strideh + 1 - parfor (wout in 1:Wout, check=0) { # all output columns - win = (wout-1) * stridew + 1 - out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1, - win:win+Wf-1]) - } - } - } - } -} - -backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, - int C, int Hin, int Win, int Hf, int Wf, - int strideh, int stridew, int padh, int padw) - return (matrix[double] dX) { - /* - * Computes the backward pass for a 2D spatial max pooling layer. - * The input data has N examples, each represented as a 3D volume - * unrolled into a single vector. - * - * Inputs: - * - dout: Gradient wrt `out` from upstream, of - * shape (N, C*Hout*Wout). - * - Hout: Output height. - * - Wout: Output width. - * - X: Inputs, of shape (N, C*Hin*Win). - * - C: Number of input channels (dimensionality of input depth). - * - Hin: Input height. - * - Win: Input width. - * - Hf: Filter height. - * - Wf: Filter width. - * - strideh: Stride over height. - * - stridew: Stride over width. - * - padh: Padding for top and bottom sides. - * A typical value is 0. - * - padw: Padding for left and right sides. - * A typical value is 0. - * - * Outputs: - * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). - */ - N = nrow(X) - - # Create gradient volume - dX = matrix(0, rows=N, cols=C*Hin*Win) - - # Gradient of max pooling - for (n in 1:N) { # all examples - Xn = matrix(X[n,], rows=C, cols=Hin*Win) - - # Pad image - pad_value = -1/0 - Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros - parfor (c in 1:C) { - Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped - Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) - Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape - } - img = Xn_padded - - dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) - for (c in 1:C) { # all channels - img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw) - dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) - for (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1) * strideh + 1 - for (wout in 1:Wout) { # all output columns - win = (wout-1) * stridew + 1 - img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] - max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix - # gradient passes through only for the max value(s) in this patch - dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] - dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1] - + dimg_slice_patch - } - } - dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) - } - - # Unpad derivs on input - dXn = matrix(0, rows=C, cols=Hin*Win) - parfor (c in 1:C, check=0) { - dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) - dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] - dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) - } - dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) - } -} - http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/run_tests.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml index 86bb77b..644662c 100644 --- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml +++ b/scripts/staging/SystemML-NN/nn/test/run_tests.dml @@ -38,16 +38,16 @@ tmp = grad_check::log_loss() # Other layers tmp = grad_check::affine() tmp = grad_check::batch_norm() -tmp = grad_check::conv_simple() -tmp = grad_check::conv() -tmp = grad_check::conv_builtin() +tmp = grad_check::conv2d_simple() +tmp = grad_check::conv2d() +tmp = grad_check::conv2d_builtin() tmp = grad_check::dropout() tmp = grad_check::l1_reg() tmp = grad_check::l2_reg() tmp = grad_check::lstm() -tmp = grad_check::max_pool_simple() -tmp = grad_check::max_pool() -tmp = grad_check::max_pool_builtin() +tmp = grad_check::max_pool2d_simple() +tmp = grad_check::max_pool2d() +tmp = grad_check::max_pool2d_builtin() tmp = grad_check::relu() tmp = grad_check::rnn() tmp = grad_check::sigmoid() @@ -72,9 +72,9 @@ print("---") tmp = test::batch_norm() tmp = test::im2col() tmp = test::padding() -tmp = test::conv() +tmp = test::conv2d() tmp = test::cross_entropy_loss() -tmp = test::max_pool() +tmp = test::max_pool2d() tmp = test::spatial_batch_norm() tmp = test::tanh() http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/test.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml index 8fb0d04..64fc519 100644 --- a/scripts/staging/SystemML-NN/nn/test/test.dml +++ b/scripts/staging/SystemML-NN/nn/test/test.dml @@ -23,23 +23,23 @@ * Various tests, not including gradient checks. */ source("nn/layers/batch_norm.dml") as batch_norm -source("nn/layers/conv.dml") as conv -source("nn/layers/conv_builtin.dml") as conv_builtin +source("nn/layers/conv2d.dml") as conv2d +source("nn/layers/conv2d_builtin.dml") as conv2d_builtin source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss -source("nn/layers/max_pool.dml") as max_pool -source("nn/layers/max_pool_builtin.dml") as max_pool_builtin +source("nn/layers/max_pool2d.dml") as max_pool2d +source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm source("nn/layers/tanh.dml") as tanh -source("nn/test/conv_simple.dml") as conv_simple -source("nn/test/max_pool_simple.dml") as max_pool_simple +source("nn/test/conv2d_simple.dml") as conv2d_simple +source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple source("nn/test/util.dml") as test_util source("nn/util.dml") as util batch_norm = function() { /* - * Test for the `batch_norm` function. + * Test for the batch normalization function. */ - print("Testing the batch_norm function.") + print("Testing the batch normalization function.") # Generate data N = 4 # Number of examples @@ -68,11 +68,11 @@ batch_norm = function() { } } -conv = function() { +conv2d = function() { /* - * Test for the `conv` functions. + * Test for the 2D convolution functions. */ - print("Testing the conv functions.") + print("Testing the 2D convolution functions.") # Generate data N = 2 # num examples @@ -87,14 +87,14 @@ conv = function() { X = rand(rows=N, cols=C*Hin*Win, pdf="normal") # Create layer - [W, b] = conv::init(F, C, Hf, Wf) + [W, b] = conv2d::init(F, C, Hf, Wf) # Forward - [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - [out_simple, Hout_simple, Wout_simple] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - [out_builtin, Hout_builtin, Wout_builtin] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # Equivalency check out = matrix(out, rows=1, cols=N*F*Hout*Wout) @@ -110,7 +110,7 @@ conv = function() { cross_entropy_loss = function() { /* - * Test for the `cross-entropy` loss function. + * Test for the cross-entropy loss function. * * Here we make sure that the cross-entropy loss function does * not propagate `infinity` values in the case that a prediction is @@ -206,11 +206,11 @@ padding = function() { } } -max_pool = function() { +max_pool2d = function() { /* - * Test for the `max_pool` functions. + * Test for the 2D max pooling functions. */ - print("Testing the max pool functions.") + print("Testing the 2D max pooling functions.") # Generate data N = 2 # num examples @@ -227,12 +227,14 @@ max_pool = function() { print(" - Testing w/ padh="+padh+" & padw="+padw+".") #if (1==1) {} # force correct printing #print(" - Testing forward") - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw) - [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, padh, padw) - [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, - padh, padw) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, + padh, padw) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, + Hf, Wf, + stride, stride, + padh, padw) # Equivalency check out = matrix(out, rows=1, cols=N*C*Hout*Wout) @@ -247,11 +249,12 @@ max_pool = function() { #print(" - Testing backward") dout = rand(rows=N, cols=C*Hout*Wout, pdf="normal") - dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw) - dX_simple = max_pool_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win, Hf, Wf, - stride, stride, padh, padw) - dX_builtin = max_pool_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win, + dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, + padh, padw) + dX_simple = max_pool2d_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw) + dX_builtin = max_pool2d_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win, + Hf, Wf, stride, stride, padh, padw) # Equivalency check dX = matrix(dX, rows=1, cols=N*C*Hin*Win) @@ -288,11 +291,11 @@ max_pool = function() { pad = 0 # forward - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # equivalency check # -- channel 1 @@ -326,11 +329,11 @@ max_pool = function() { pad = 1 # forward - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # equivalency check # -- channel 1 @@ -363,11 +366,11 @@ max_pool = function() { pad = 0 # forward - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # equivalency check # -- channel 1 @@ -402,11 +405,11 @@ max_pool = function() { pad = 1 # forward - [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) - [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) - [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, - stride, stride, pad, pad) + [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, + stride, stride, pad, pad) # equivalency check # -- channel 1 @@ -417,7 +420,8 @@ max_pool = function() { # 0 0 0 # 0 -6 0 # 0 0 0 - target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", rows=1, cols=C*Hout*Wout) + target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", + rows=1, cols=C*Hout*Wout) target = rbind(target, target) # n=2 tmp = test_util::check_all_equal(out, target) tmp = test_util::check_all_equal(out_simple, target) @@ -426,9 +430,9 @@ max_pool = function() { spatial_batch_norm = function() { /* - * Test for the `spatial_batch_norm` function. + * Test for the spatial batch normalization function. */ - print("Testing the spatial_batch_norm function.") + print("Testing the spatial batch normalization function.") # Generate data N = 2 # Number of examples @@ -532,7 +536,8 @@ tanh = function() { # Equivalency check for (i in 1:nrow(out)) { for (j in 1:ncol(out)) { - rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12) + rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), + 1e-10, 1e-12) } } }
