Repository: incubator-systemml Updated Branches: refs/heads/master 9dc42b2fc -> 781d24d86
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/test/conv_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml new file mode 100644 index 0000000..f065668 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml @@ -0,0 +1,211 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * 2D Convolutional layer. + * + * This implementation is intended to be a simple, reference version. + */ +forward = function(matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial convolutional layer with + * F filters. The input data has N examples, each represented as a 3D + * volume unrolled into a single vector. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). + * - b: Biases vector, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - out: Outputs, of shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) + Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) + + # Create output volume + out = matrix(0, rows=N, cols=F*Hout*Wout) + + # Convolution - Simple reference implementation + parfor (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + # Pad image + Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + # Convolve image with filters + parfor (f in 1:F, check=0) { # all filters + parfor (hout in 1:Hout, check=0) { # all output rows + h0 = (hout-1) * strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + w0 = (wout-1) * stridew + 1 + # Create a patch of the input example corresponding spatially to the filter sizes + Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C, check=0) { + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape + Xn_padded_patch[c,] = + matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1, cols=Hf*Wf) # reshape + } + out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] = + W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,] + } + } + } + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, + matrix[double] X, matrix[double] W, matrix[double] b, + int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, int padh, int padw) + return (matrix[double] dX, matrix[double] dW, matrix[double] db) { + /* + * Computes the backward pass for a 2D spatial convolutional layer + * with F filters. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Previous input data matrix, of shape (N, C*Hin*Win). + * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). + * - b: Biases vector, of shape (F, 1). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - dX: Gradient wrt X, of shape (N, C*Hin*Win). + * - dW: Gradient wrt W, of shape (F, C*Hf*Wf). + * - db: Gradient wrt b, of shape (F, 1). + */ + N = nrow(X) + F = nrow(W) + Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) + Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) + + # Create gradient volumes + dX = matrix(0, rows=N, cols=C*Hin*Win) + dW = matrix(0, rows=F, cols=C*Hf*Wf) + db = matrix(0, rows=F, cols=1) + + # Partial derivatives for convolution - Simple reference implementation + for (n in 1:N) { # all examples + Xn = matrix(X[n,], rows=C, cols=Hin*Win) + # Pad image + Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) + Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice + Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } + dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) + for (f in 1:F) { # all filters + for (hout in 1:Hout) { # all output rows + h0 = (hout-1) * strideh + 1 + for (wout in 1:Wout) { # all output columns + w0 = (wout-1) * stridew + 1 + # Create a patch of the input example corresponding spatially to the filter sizes + Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout], + rows=C, cols=Hf*Wf) # reshape + for (c in 1:C) { + Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape + Xn_padded_patch[c,] = + matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1, cols=Hf*Wf) # reshape + dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) + dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = + matrix(dXn_padded_patch[c,], rows=Hf, cols=Wf) # reshape + dXn_padded[c,] = dXn_padded[c,] + + matrix(dXn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) + } + dW[f,] = dW[f,] + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf) * + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] + db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] + } + } + } + # Unpad derivs on input + dXn = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C, check=0) { + dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) + dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] + dXn[c, ] = matrix(dXn_slice, rows=1, cols=Hin*Win) + } + dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) + } +} + +init = function(int F, int C, int Hf, int Wf) + return (matrix[double] W, matrix[double] b) { + /* + * Initialize the parameters of this layer. + * + * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852], + * which limits the magnification of inputs/gradients during + * forward/backward passes by scaling unit-Gaussian weights by a + * factor of sqrt(2/n), under the assumption of relu neurons. + * + * Inputs: + * - F: Number of filters. + * - C: Number of input channels (dimensionality of depth). + * - Hf: Filter height. + * - Wf: Filter width. + * + * Outputs: + * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). + * - b: Biases vector, of shape (F, 1). + */ + W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) + b = matrix(0, rows=F, cols=1) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml new file mode 100644 index 0000000..af985a3 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml @@ -0,0 +1,1139 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Gradient checks for various architectures. + */ +source("nn/layers/affine.dml") as affine +source("nn/layers/conv.dml") as conv +source("nn/layers/conv_builtin.dml") as conv_builtin +source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss +source("nn/layers/dropout.dml") as dropout +source("nn/layers/l1_loss.dml") as l1_loss +source("nn/layers/l1_reg.dml") as l1_reg +source("nn/layers/l2_loss.dml") as l2_loss +source("nn/layers/l2_reg.dml") as l2_reg +source("nn/layers/log_loss.dml") as log_loss +source("nn/layers/max_pool.dml") as max_pool +source("nn/layers/max_pool_builtin.dml") as max_pool_builtin +source("nn/layers/relu.dml") as relu +source("nn/layers/sigmoid.dml") as sigmoid +source("nn/layers/softmax.dml") as softmax +source("nn/layers/tanh.dml") as tanh +source("nn/test/conv_simple.dml") as conv_simple +source("nn/test/max_pool_simple.dml") as max_pool_simple +source("nn/util.dml") as util + +check_rel_error = function(double dw_a, double dw_n, double lossph, double lossmh) + return (double rel_error) { + /* + * Check and report any issues with the relative error measure between + * the analytical and numerical partial derivatives. + * + * - Issues an "ERROR" statement for relative errors > 1e-2, + * indicating that the gradient is likely incorrect. + * - Issues a "WARNING" statement for relative errors < 1e-2 + * but > 1e-4, indicating that the may be incorrect. + * + * Inputs: + * - dw_a: Analytical partial derivative wrt w. + * - dw_n: Numerical partial derivative wrt w. + * - lossph: Loss evaluated with w set to w+h. + * - lossmh: Loss evaluated with w set to w-h. + * + * Outputs: + * - rel_error: Relative error measure between the two derivatives. + */ + # Compute relative error + rel_error = util::compute_rel_error(dw_a, dw_n) + + # Evaluate relative error + if (rel_error > 1e-2) { + print("ERROR: Relative error " + rel_error + " > 1e-2 with " + dw_a + + " analytical vs " + dw_n + " numerical, with lossph " + lossph + + " and lossmh " + lossmh) + } + else if (rel_error > 1e-4 & rel_error <= 1e-2) { + print("WARNING: Relative error " + rel_error + " <= 1e-2 & > 1e-4 with " + dw_a + + " analytical vs " + dw_n + " numerical, with lossph " + lossph + + " and lossmh " + lossmh) + } +} + +affine = function() { + /* + * Gradient check for the affine layer. + */ + print("Grad checking the affine layer with L2 loss.") + + # Generate data + N = 3 # num examples + D = 100 # num features + M = 10 # num neurons + X = rand(rows=N, cols=D) + y = rand(rows=N, cols=M) + [W, b] = affine::init(D, M) + + # Compute analytical gradients of loss wrt parameters + out = affine::forward(X, W, b) + dout = l2_loss::backward(out, y) + [dX, dW, db] = affine::backward(dout, X, W, b) + + # Grad check + h = 1e-5 + print(" - Grad checking X.") + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = affine::forward(X, W, b) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = affine::forward(X, W, b) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } + + print(" - Grad checking W.") + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + outmh = affine::forward(X, W, b) + lossmh = l2_loss::forward(outmh, y) + W[i,j] = old + h + outph = affine::forward(X, W, b) + lossph = l2_loss::forward(outph, y) + W[i,j] = old # reset + dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) + } + } + + print(" - Grad checking b.") + for (i in 1:nrow(b)) { + for (j in 1:ncol(b)) { + # Compute numerical derivative + old = as.scalar(b[i,j]) + b[i,j] = old - h + outmh = affine::forward(X, W, b) + lossmh = l2_loss::forward(outmh, y) + b[i,j] = old + h + outph = affine::forward(X, W, b) + lossph = l2_loss::forward(outph, y) + b[i,j] = old # reset + db_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) + } + } +} + +conv = function() { + /* + * Gradient check for the convolutional layer using `im2col`. + */ + print("Grad checking the `im2col` convolutional layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 5 # input height + Win = 5 # input width + F = 2 # num filters + Hf = 3 # filter height + Wf = 3 # filter width + stride = 1 + pad = 1 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=F*Hin*Win) + + # Create layers + [W, b] = conv::init(F, C, Hf, Wf) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + dout = l2_loss::backward(out, y) + [dX, dW, db] = + conv::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + + # Grad check + h = 1e-5 + print(" - Grad checking X.") + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } + + print(" - Grad checking W.") + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + W[i,j] = old + h + [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + W[i,j] = old # reset + dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) + } + } + + print(" - Grad checking b.") + for (i in 1:nrow(b)) { + for (j in 1:ncol(b)) { + # Compute numerical derivative + old = as.scalar(b[i,j]) + b[i,j] = old - h + [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + b[i,j] = old + h + [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + b[i,j] = old # reset + db_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) + } + } +} + +conv_builtin = function() { + /* + * Gradient check for the convolutional layer using built-in functions. + */ + print("Grad checking the built-in convolutional layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 5 # input height + Win = 5 # input width + F = 2 # num filters + Hf = 3 # filter height + Wf = 3 # filter width + stride = 1 + pad = 1 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=F*Hin*Win) + + # Create layers + [W, b] = conv_builtin::init(F, C, Hf, Wf) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + dout = l2_loss::backward(out, y) + [dX, dW, db] = + conv_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + + # Grad check + h = 1e-5 + print(" - Grad checking X.") + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } + + print(" - Grad checking W.") + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + W[i,j] = old + h + [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + W[i,j] = old # reset + dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) + } + } + + print(" - Grad checking b.") + for (i in 1:nrow(b)) { + for (j in 1:ncol(b)) { + # Compute numerical derivative + old = as.scalar(b[i,j]) + b[i,j] = old - h + [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + b[i,j] = old + h + [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + b[i,j] = old # reset + db_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) + } + } +} + +conv_simple = function() { + /* + * Gradient check for the simple reference convolutional layer. + */ + print("Grad checking the simple reference convolutional layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 5 # input height + Win = 5 # input width + F = 2 # num filters + Hf = 3 # filter height + Wf = 3 # filter width + stride = 1 + pad = 1 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=F*Hin*Win) + + # Create layers + [W, b] = conv_simple::init(F, C, Hf, Wf) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + dout = l2_loss::backward(out, y) + [dX, dW, db] = + conv_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + + # Grad check + h = 1e-5 + print(" - Grad checking X.") + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } + + print(" - Grad checking W.") + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + W[i,j] = old + h + [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + W[i,j] = old # reset + dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) + } + } + + print(" - Grad checking b.") + for (i in 1:nrow(b)) { + for (j in 1:ncol(b)) { + # Compute numerical derivative + old = as.scalar(b[i,j]) + b[i,j] = old - h + [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossmh = l2_loss::forward(outmh, y) + b[i,j] = old + h + [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + lossph = l2_loss::forward(outph, y) + b[i,j] = old # reset + db_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) + } + } +} + +cross_entropy_loss = function() { + /* + * Gradient check for the cross-entropy loss function. + */ + print("Grad checking the cross-entropy loss function.") + + # Generate data + N = 3 # num examples + K = 10 # num targets + pred = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") + pred = pred / rowSums(pred) # normalized probs + y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform") + y = y / rowSums(y) # normalized probs + + # Compute analytical gradient + dpred = cross_entropy_loss::backward(pred, y) + + # Grad check + h = 1e-5 + for (i in 1:nrow(pred)) { + for (j in 1:ncol(pred)) { + # Compute numerical derivative + old = as.scalar(pred[i,j]) + pred[i,j] = old - h + lossmh = cross_entropy_loss::forward(pred, y) + pred[i,j] = old + h + lossph = cross_entropy_loss::forward(pred, y) + pred[i,j] = old # reset W[i,j] + dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) + } + } +} + +dropout = function() { + /* + * Gradient check for the (inverted) dropout layer. + */ + print("Grad checking the (inverted) dropout layer with L2 loss.") + + # Generate data + N = 3 # num examples + M = 100 # num neurons + p = 0.5 # probability of dropping neuron output + seed = as.integer(floor(as.scalar(rand(rows=1, cols=1, min=1, max=100000)))) # random seed + X = rand(rows=N, cols=M) + y = rand(rows=N, cols=M) + + # Compute analytical gradients of loss wrt parameters + [out, mask] = dropout::forward(X, p, seed) + dout = l2_loss::backward(out, y) + dX = dropout::backward(dout, X, p, mask) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, mask] = dropout::forward(X, p, seed) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, mask] = dropout::forward(X, p, seed) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +l1_loss = function() { + /* + * Gradient check for the L1 loss function. + */ + print("Grad checking the L1 loss function.") + + # Generate data + N = 3 # num examples + D = 2 # num targets + pred = rand(rows=N, cols=D) + y = rand(rows=N, cols=D) + + # Compute analytical gradient + dpred = l1_loss::backward(pred, y) + + # Grad check + h = 1e-5 + for (i in 1:nrow(pred)) { + for (j in 1:ncol(pred)) { + # Compute numerical derivative + old = as.scalar(pred[i,j]) + pred[i,j] = old - h + lossmh = l1_loss::forward(pred, y) + pred[i,j] = old + h + lossph = l1_loss::forward(pred, y) + pred[i,j] = old # reset W[i,j] + dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) + } + } +} + +l1_reg = function() { + /* + * Gradient check for the L1 regularization function. + */ + print("Grad checking the L1 regularization function.") + + # Generate data + D = 5 # num features + M = 3 # num neurons + lambda = 0.01 + W = rand(rows=D, cols=M) + + # Compute analytical gradient + dW = l1_reg::backward(W, lambda) + + # Grad check + h = 1e-5 + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + reg_lossmh = l1_reg::forward(W, lambda) + W[i,j] = old + h + reg_lossph = l1_reg::forward(W, lambda) + W[i,j] = old # reset W[i,j] + dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh) + } + } +} + +l2_loss = function() { + /* + * Gradient check for the L2 loss function. + */ + print("Grad checking the L2 loss function.") + + # Generate data + N = 3 # num examples + D = 2 # num targets + pred = rand(rows=N, cols=D) + y = rand(rows=N, cols=D) + + # Compute analytical gradient + dpred = l2_loss::backward(pred, y) + + # Grad check + h = 1e-5 + for (i in 1:nrow(pred)) { + for (j in 1:ncol(pred)) { + # Compute numerical derivative + old = as.scalar(pred[i,j]) + pred[i,j] = old - h + lossmh = l2_loss::forward(pred, y) + pred[i,j] = old + h + lossph = l2_loss::forward(pred, y) + pred[i,j] = old # reset W[i,j] + dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) + } + } +} + +l2_reg = function() { + /* + * Gradient check for the L2 regularization function. + */ + print("Grad checking the L2 regularization function.") + + # Generate data + D = 5 # num features + M = 3 # num neurons + lambda = 0.01 + W = rand(rows=D, cols=M) + + # Compute analytical gradient + dW = l2_reg::backward(W, lambda) + + # Grad check + h = 1e-5 + for (i in 1:nrow(W)) { + for (j in 1:ncol(W)) { + # Compute numerical derivative + old = as.scalar(W[i,j]) + W[i,j] = old - h + reg_lossmh = l2_reg::forward(W, lambda) + W[i,j] = old + h + reg_lossph = l2_reg::forward(W, lambda) + W[i,j] = old # reset W[i,j] + dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh) + } + } +} + +log_loss = function() { + /* + * Gradient check for the log loss function. + */ + print("Grad checking the log loss function.") + + # Generate data + N = 20 # num examples + D = 1 # num targets + pred = rand(rows=N, cols=D, min=0, max=1, pdf="uniform") + y = round(rand(rows=N, cols=D, min=0, max=1, pdf="uniform")) + + # Compute analytical gradient + dpred = log_loss::backward(pred, y) + + # Grad check + h = 1e-5 + for (i in 1:nrow(pred)) { + for (j in 1:ncol(pred)) { + # Compute numerical derivative + old = as.scalar(pred[i,j]) + pred[i,j] = old - h + lossmh = log_loss::forward(pred, y) + pred[i,j] = old + h + lossph = log_loss::forward(pred, y) + pred[i,j] = old # reset W[i,j] + dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) + } + } +} + +max_pool = function() { + /* + * Gradient check for the max pooling layer. + */ + print("Grad checking the max pooling layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 4 # input height + Win = 4 # input width + Hf = 2 # pool filter height + Wf = 2 # pool filter width + stride = 2 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=C*2*2) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + dout = l2_loss::backward(out, y) + dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +max_pool_builtin = function() { + /* + * Gradient check for the max pooling layer. + */ + print("Grad checking the built-in max pooling layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 4 # input height + Win = 4 # input width + Hf = 2 # pool filter height + Wf = 2 # pool filter width + stride = 2 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=C*2*2) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + dout = l2_loss::backward(out, y) + dX = max_pool_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +max_pool_simple = function() { + /* + * Gradient check for the simple reference max pooling layer. + */ + print("Grad checking the simple reference max pooling layer with L2 loss.") + + # Generate data + N = 2 # num examples + C = 2 # num channels + Hin = 4 # input height + Win = 4 # input width + Hf = 2 # pool filter height + Wf = 2 # pool filter width + stride = 2 + X = rand(rows=N, cols=C*Hin*Win) + y = rand(rows=N, cols=C*2*2) + + # Compute analytical gradients of loss wrt parameters + [out, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + dout = l2_loss::backward(out, y) + dX = max_pool_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + [outmh, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + [outph, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +relu = function() { + /* + * Gradient check for the ReLU nonlinearity layer. + * + * NOTE: This could result in a false-negative in which the test + * fails due to a kink being crossed in the nonlinearity. This + * occurs when the tests, f(x-h) and f(x+h), end up on opposite + * sides of the zero threshold of max(0, fx). For now, just run + * the tests again. In the future, we can explicitly check for + * this and rerun the test automatically. + */ + print("Grad checking the ReLU nonlinearity layer with L2 loss.") + + # Generate data + N = 3 # num examples + M = 10 # num neurons + X = rand(rows=N, cols=M, min=-5, max=5) + y = rand(rows=N, cols=M) + + # Compute analytical gradients of loss wrt parameters + out = relu::forward(X) + dout = l2_loss::backward(out, y) + dX = relu::backward(dout, X) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = relu::forward(X) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = relu::forward(X) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +sigmoid = function() { + /* + * Gradient check for the sigmoid nonlinearity layer. + */ + print("Grad checking the sigmoid nonlinearity layer with L2 loss.") + + # Generate data + N = 3 # num examples + M = 10 # num neurons + X = rand(rows=N, cols=M) + y = rand(rows=N, cols=M) + + # Compute analytical gradients of loss wrt parameters + out = sigmoid::forward(X) + dout = l2_loss::backward(out, y) + dX = sigmoid::backward(dout, X) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = sigmoid::forward(X) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = sigmoid::forward(X) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +softmax = function() { + /* + * Gradient check for the softmax layer. + */ + print("Grad checking the softmax layer with L2 loss.") + + # Generate data + N = 3 # num examples + D = 10 # num classes + X = rand(rows=N, cols=D) + y = rand(rows=N, cols=D, min=0, max=1, pdf="uniform") + y = y / rowSums(y) + + # Compute analytical gradients of loss wrt parameters + out = softmax::forward(X) + dout = l2_loss::backward(out, y) + dX = softmax::backward(dout, X) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = softmax::forward(X) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = softmax::forward(X) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +tanh = function() { + /* + * Gradient check for the hyperbolic tangent (tanh) nonlinearity layer. + */ + print("Grad checking the tanh nonlinearity layer with L2 loss.") + + # Generate data + N = 3 # num examples + M = 10 # num neurons + X = rand(rows=N, cols=M) + y = rand(rows=N, cols=M) + + # Compute analytical gradients of loss wrt parameters + out = tanh::forward(X) + dout = l2_loss::backward(out, y) + dX = tanh::backward(dout, X) + + # Grad check + h = 1e-5 + for (i in 1:nrow(X)) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old = as.scalar(X[i,j]) + X[i,j] = old - h + outmh = tanh::forward(X) + lossmh = l2_loss::forward(outmh, y) + X[i,j] = old + h + outph = tanh::forward(X) + lossph = l2_loss::forward(outph, y) + X[i,j] = old # reset + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } +} + +two_layer_affine_l2_net = function() { + /* + * Gradient check for a two-layer, fully-connected, feed-forward + * network with ReLU nonlinearity and L2 loss. + * + * NOTE: This could result in a false-negative in which the test + * fails due to a kink being crossed in the ReLU nonlinearity. This + * occurs when the tests, f(x-h) and f(x+h), end up on opposite + * sides of the zero threshold of max(0, fx). For now, just run + * the tests again. In the future, we can explicitly check for + * this and rerun the test automatically. + */ + print("Grad checking a two-layer, fully-connected, feed-forward network with a ReLU " + + "nonlinearity, and an L2 loss function.") + + # Generate input data + N = 1000 # num examples + D = 100 # num features + yD = 5 # num targets + X = rand(rows=N, cols=D, pdf="normal") * 0.0001 + y = rand(rows=N, cols=yD) + + # Create 2-layer, fully-connected network + M = 10 # number of hidden neurons + [W1, b1] = affine::init(D, M) + [W2, b2] = affine::init(M, yD) + + # Optimize for short "burn-in" time to move to characteristic + # mode of operation and unmask any real issues. + print(" - Burn-in:") + lr = 0.0001 + decay = 0.99 + for(i in 1:5) { + # Compute forward and backward passes of net + [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2) + print(" - L2 loss: " + loss) + + # Optimize with basic SGD + W1 = W1 - lr * dW1 + b1 = b1 - lr * db1 + W2 = W2 - lr * dW2 + b2 = b2 - lr * db2 + lr = lr * decay + } + + # Compute analytical gradients + [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2) + + # Grad check + h = 1e-5 + print(" - Grad checking X.") + for (i in 1:2) { + for (j in 1:ncol(X)) { + # Compute numerical derivative + old_w = as.scalar(X[i,j]) + X[i,j] = old_w - h + [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + X[i,j] = old_w + h + [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + X[i,j] = old_w # reset W[i,j] + dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) + } + } + + print(" - Grad checking W1.") + for (i in 1:nrow(W1)) { + for (j in 1:ncol(W1)) { + # Compute numerical derivative + old_w = as.scalar(W1[i,j]) + W1[i,j] = old_w - h + [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + W1[i,j] = old_w + h + [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + W1[i,j] = old_w # reset W[i,j] + dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh) + } + } + + print(" - Grad checking W2.") + for (i in 1:nrow(W2)) { + for (j in 1:ncol(W2)) { + # Compute numerical derivative + old_w = as.scalar(W2[i,j]) + W2[i,j] = old_w - h + [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + W2[i,j] = old_w + h + [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + W2[i,j] = old_w # reset W[i,j] + dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh) + } + } + + print(" - Grad checking b1.") + for (i in 1:nrow(b1)) { + for (j in 1:ncol(b1)) { + # Compute numerical derivative + old_b = as.scalar(b1[i,j]) + b1[i,j] = old_b - h + [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + b1[i,j] = old_b + h + [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + b1[i,j] = old_b # reset b[1,j] + dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh) + } + } + + print(" - Grad checking b2.") + for (i in 1:nrow(b2)) { + for (j in 1:ncol(b2)) { + # Compute numerical derivative + old_b = as.scalar(b2[i,j]) + b2[i,j] = old_b - h + [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + b2[i,j] = old_b + h + [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + b2[i,j] = old_b # reset b[1,j] + dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative + + # Check error + rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh) + } + } +} + +/* + * Test network with forward/backward functions. + */ +two_layer_affine_l2_net_run = function(matrix[double] X, matrix[double] y, + matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2) + return (matrix[double] pred, double loss, + matrix[double] dX, + matrix[double] dW1, matrix[double] db1, + matrix[double] dW2, matrix[double] db2) { + # Compute forward pass + [loss, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) + + # Compute backward pass + [dX, dpred, daout, dhout, dW1, db1, dW2, db2] = + two_layer_affine_l2_net_backward(X, y, pred, aout, hout, W1, b1, W2, b2) +} + +two_layer_affine_l2_net_forward = function(matrix[double] X, matrix[double] y, + matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2) + return (double loss, matrix[double] pred, matrix[double] aout, matrix[double] hout) { + # Compute forward pass + hout = affine::forward(X, W1, b1) + aout = relu::forward(hout) + pred = affine::forward(aout, W2, b2) + + # Compute loss + loss = l2_loss::forward(pred, y) +} + +two_layer_affine_l2_net_backward = function(matrix[double] X, matrix[double] y, matrix[double] pred, + matrix[double] aout, matrix[double] hout, + matrix[double] W1, matrix[double] b1, + matrix[double] W2, matrix[double] b2) + return (matrix[double] dX, matrix[double] dpred, + matrix[double] daout, matrix[double] dhout, + matrix[double] dW1, matrix[double] db1, matrix[double] dW2, matrix[double] db2) { + # Compute backward pass + dpred = l2_loss::backward(pred, y) + [daout, dW2, db2] = affine::backward(dpred, aout, W2, b2) + dhout = relu::backward(daout, hout) + [dX, dW1, db1] = affine::backward(dhout, X, W1, b1) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml new file mode 100644 index 0000000..2f90779 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml @@ -0,0 +1,130 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Max pooling layer. + * + * This implementation is intended to be a simple, reference version. + */ +forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew) + return (matrix[double] out, int Hout, int Wout) { + /* + * Computes the forward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * This implementation is intended to be a simple, reference version. + * + * Inputs: + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * + * Outputs: + * - out: Outputs, of shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + */ + N = nrow(X) + Hout = as.integer((Hin - Hf) / strideh + 1) + Wout = as.integer((Win - Wf) / stridew + 1) + + # Create output volume + out = matrix(0, rows=N, cols=C*Hout*Wout) + + # Max pooling + parfor (n in 1:N, check=0) { # all examples + img = matrix(X[n,], rows=C, cols=Hin*Win) + parfor (c in 1:C, check=0) { # all channels + img_slice = matrix(img[c,], rows=Hin, cols=Win) + parfor (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1) * strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + win = (wout-1) * stridew + 1 + out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = + max(img_slice[hin:hin+Hf-1, win:win+Wf-1]) + } + } + } + } +} + +backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, int C, + int Hin, int Win, int Hf, int Wf, int strideh, int stridew) + return (matrix[double] dX) { + /* + * Computes the backward pass for a 2D spatial max pooling layer. + * The input data has N examples, each represented as a 3D volume + * unrolled into a single vector. + * + * Inputs: + * - dout: Derivatives from upstream, of shape (N, C*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - X: Input data matrix, of shape (N, C*Hin*Win). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * + * Outputs: + * - dX: Gradient wrt X, of shape (N, C*Hin*Win). + */ + N = nrow(X) + + # Create gradient volume + dX = matrix(0, rows=N, cols=C*Hin*Win) + + # Gradient of max pooling + parfor (n in 1:N, check=0) { # all examples + img = matrix(X[n,], rows=C, cols=Hin*Win) + dimg = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C, check=0) { # all channels + img_slice = matrix(img[c,], rows=Hin, cols=Win) + dimg_slice = matrix(0, rows=Hin, cols=Win) + for (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1) * strideh + 1 + for (wout in 1:Wout) { # all output columns + win = (wout-1) * stridew + 1 + img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1] + max_val = max(img_slice_patch) + max_val_ind = ppred(img_slice_patch, max_val, "==") # max value indicator + # gradient passes through only for the max value in this patch + dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] + dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = + dimg_slice[hin:hin+Hf-1, win:win+Wf-1] + dimg_slice_patch + } + } + dimg[c,] = matrix(dimg_slice, rows=1, cols=Hin*Win) + } + dX[n,] = matrix(dimg, rows=1, cols=C*Hin*Win) + } +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/test/test.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml new file mode 100644 index 0000000..58ee3e1 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/test.dml @@ -0,0 +1,192 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Various tests, not including gradient checks. + */ +source("nn/layers/conv.dml") as conv +source("nn/layers/conv_builtin.dml") as conv_builtin +source("nn/layers/max_pool.dml") as max_pool +source("nn/layers/max_pool_builtin.dml") as max_pool_builtin +source("nn/test/conv_simple.dml") as conv_simple +source("nn/test/max_pool_simple.dml") as max_pool_simple +source("nn/util.dml") as util + +conv = function() { + /* + * Test for the `conv` functions. + */ + print("Testing the conv functions.") + + # Generate data + N = 2 # num examples + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + F = 2 # num filters + Hf = 3 # filter height + Wf = 3 # filter width + stride = 1 + pad = 1 + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + + # Create layer + [W, b] = conv::init(F, C, Hf, Wf) + + # Forward + [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_simple, Hout_simple, Wout_simple] = + conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + [out_builtin, Hout_builtin, Wout_builtin] = + conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) + + # Equivalency check + out = matrix(out, rows=1, cols=N*F*Hout*Wout) + out_simple = matrix(out_simple, rows=1, cols=N*F*Hout*Wout) + out_builtin = matrix(out_builtin, rows=1, cols=N*F*Hout*Wout) + for (i in 1:length(out)) { + rel_error = util::check_rel_error(as.scalar(out[1,i]), as.scalar(out_simple[1,i]), 1e-10, 1e-12) + rel_error = util::check_rel_error(as.scalar(out[1,i]), as.scalar(out_builtin[1,i]), 1e-10, 1e-12) + } +} + +im2col = function() { + /* + * Test for the `im2col` and `col2im` functions. + */ + print("Testing the im2col and col2im functions.") + + # Generate data + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + Hf = 3 # filter height + Wf = 3 # filter width + stride = 2 + pad = (Hin * stride - Hin + Hf - stride) / 2 + Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1) + Wout = as.integer((Win + 2 * pad - Wf) / stride + 1) + x = rand(rows=C, cols=Hin*Win) + + # pad + x_pad = util::pad_image(x, Hin, Win, pad, pad) + + # im2col + x_cols = util::im2col(x_pad, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride) + + # col2im + x_pad2 = util::col2im(x_cols, C, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride, "none") + + # Equivalency check + equivalent = util::all_equal(x_pad, x_pad2) + if (!equivalent) + print("ERROR: im2col and then col2im does not yield the original image.") +} + +padding = function() { + /* + * Test for the `pad_image` and `unpad_image` functions. + */ + print("Testing the padding and unpadding functions.") + + # Generate data + C = 3 # num channels + Hin = 5 # input height + Win = 5 # input width + pad = 3 # padding + x = rand(rows=C, cols=Hin*Win) + + # Pad image + x_pad = util::pad_image(x, Hin, Win, pad, pad) + + # Check for padded rows & columns + for (c in 1:C) { + x_pad_slice = matrix(x_pad[c,], rows=Hin+2*pad, cols=Win+2*pad) + for (i in 1:pad) { + rowsum = sum(x_pad_slice[i,]) + colsum = sum(x_pad_slice[,i]) + if (rowsum != 0) + print("ERROR: Padding was not applied to row " + i + ".") + if (colsum != 0) + print("ERROR: Padding was not applied to column " + i + ".") + } + } + + # Unpad image + x1 = util::unpad_image(x_pad, Hin, Win, pad, pad) + + # Equivalency check + equivalent = util::all_equal(x, x1) + if (!equivalent) + print("ERROR: Padding and then unpadding does not yield the original image.") +} + +max_pool = function() { + /* + * Test for the `max_pool` functions. + */ + print("Testing the max pool functions.") + + # Generate data + N = 2 # num examples + C = 3 # num channels + Hin = 8 # input height + Win = 8 # input width + Hf = 2 # filter height + Wf = 2 # filter width + stride = 2 + X = rand(rows=N, cols=C*Hin*Win, pdf="normal") + + # Forward + [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + [out_simple, Hout_simple, Wout_simple] = + max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + [out_builtin, Hout_builtin, Wout_builtin] = + max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + + # Equivalency check + out = matrix(out, rows=1, cols=N*C*Hout*Wout) + out_simple = matrix(out_simple, rows=1, cols=N*C*Hout*Wout) + out_builtin = matrix(out_builtin, rows=1, cols=N*C*Hout*Wout) + for (i in 1:length(out)) { + rel_error = util::check_rel_error(as.scalar(out[1,i]), as.scalar(out_simple[1,i]), 1e-10, 1e-12) + rel_error = util::check_rel_error(as.scalar(out[1,i]), as.scalar(out_builtin[1,i]), 1e-10, 1e-12) + } + + # --- + # Check for correct behavior + # Generate data + C = 2 # num channels + Hin = 4 # input height + Win = 4 # input width + X = matrix(seq(1,16,1), rows=Hin, cols=Win) + X = matrix(rbind(X, t(X)), rows=1, cols=C*Hin*Win) + X = rbind(X, X) # N=2 + + # Forward + [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride) + + # Equivalency check + target = matrix("6 8 14 16 6 14 8 16", rows=1, cols=C*Hout*Wout) + target = rbind(target, target) # N=2 + tmp = util::check_all_equal(out, target) +} + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/test/tests.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/tests.dml b/scripts/staging/SystemML-NN/nn/test/tests.dml new file mode 100644 index 0000000..1b91967 --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/test/tests.dml @@ -0,0 +1,72 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Script to run tests. + */ +source("nn/test/grad_check.dml") as grad_check +source("nn/test/test.dml") as test + +print("") +print("Starting grad checks.") +print("---") + +tmp = grad_check::cross_entropy_loss() +tmp = grad_check::l1_loss() +tmp = grad_check::l2_loss() +tmp = grad_check::log_loss() +tmp = grad_check::affine() +tmp = grad_check::conv_simple() +tmp = grad_check::conv() +tmp = grad_check::conv_builtin() +tmp = grad_check::dropout() +tmp = grad_check::l1_reg() +tmp = grad_check::l2_reg() +tmp = grad_check::max_pool_simple() +tmp = grad_check::max_pool() +tmp = grad_check::max_pool_builtin() +tmp = grad_check::relu() +tmp = grad_check::sigmoid() +tmp = grad_check::softmax() +tmp = grad_check::tanh() +tmp = grad_check::two_layer_affine_l2_net() + +print("---") +print("Grad checks complete -- look for any ERRORs or WARNINGs.") +print("If any tests involving ReLUs failed, try a few times " + + "to ensure that they were not false negatives due to " + + "kinks being crossed.") +print("") + +print("") +print("Starting other tests.") +print("---") + +tmp = test::im2col() +tmp = test::padding() +tmp = test::conv() +tmp = test::max_pool() + +print("---") +print("Other tests complete -- look for any ERRORs or WARNINGs.") +print("") +print("") + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/781d24d8/scripts/staging/SystemML-NN/nn/util.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml new file mode 100644 index 0000000..213363b --- /dev/null +++ b/scripts/staging/SystemML-NN/nn/util.dml @@ -0,0 +1,266 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +/* + * Utility functions. + */ +all_equal = function(matrix[double] X1, matrix[double] X2) + return(boolean equivalent) { + /* + * Determine if two matrices are equivalent. + * + * Inputs: + * - X1: Input matrix, of shape (any, any). + * - X2: Input matrix, of same shape as X1. + * + * Outputs: + * - equivalent: Whether or not the two matrices are equivalent. + */ + equivalent = as.logical(prod(ppred(X1, X2, "=="))) +} + +check_all_equal = function(matrix[double] X1, matrix[double] X2) + return(boolean equivalent) { + /* + * Check if two matrices are equivalent, and report any issues. + * + * - Issues an "ERROR" statement if elements of the two matrices + * are not equal. + * + * Inputs: + * - X1: Input matrix, of shape (any, any). + * - X2: Input matrix, of same shape as X1. + * + * Outputs: + * - equivalent: Whether or not the two matrices are equivalent. + */ + # Determine if matrices are equivalent + equivalent = all_equal(X1, X2) + + # Evaluate relative error + if (!equivalent) { + print("ERROR: The two matrices are not equivalent.") + } +} + +compute_rel_error = function(double x1, double x2) return (double rel_error) { + /* + * Relative error measure between two values. + * + * Uses smoothing to avoid divide-by-zero errors. + * + * Inputs: + * - x1: First value. + * - x2: Second value. + * + * Outputs: + * - rel_error: Relative error measure between the two values. + */ + rel_error = abs(x1 - x2) / max(1e-8, abs(x1) + abs(x2)) +} + +check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn) + return (double rel_error) { + /* + * Check and report any issues with the relative error measure between + * two values. + * + * - Issues an "ERROR" statement for relative errors > thresh_error, + * indicating that the implementation is likely incorrect. + * - Issues a "WARNING" statement for relative errors < thresh_error + * but > thresh_warn, indicating that the implementation may be incorrect. + * + * Inputs: + * - x1: First value. + * - x2: Second value. + * - thresh_error: Error threshold. + * - thresh_warn: Warning threshold. + * + * Outputs: + * - rel_error: Relative error measure between the two values. + */ + # Compute relative error + rel_error = compute_rel_error(x1, x2) + + # Evaluate relative error + if (rel_error > thresh_error) { + print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 + + " vs " + x2 + ".") + } + else if (rel_error > thresh_warn & rel_error < thresh_error) { + print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " with " + x1 + + " vs " + x2 + ".") + } +} + +im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew) + return (matrix[double] img_cols) { + /* + * Rearrange local image regions (patches) into columns. + * + * Assumes image has already been padded as necessary. + * + * Inputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + * - Hin: Input height, including padding. + * - Win: Input width, including padding. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * + * Outputs: + * - img_cols: Local spatial regions (patches) of the image stretched + * out into columns, of shape (C*Hf*Wf, Hout*Wout). + */ + C = nrow(img) + Hout = as.integer((Hin - Hf) / strideh + 1) + Wout = as.integer((Win - Wf) / stridew + 1) + + img_cols = matrix(0, rows=C*Hf*Wf, cols=Hout*Wout) # zeros + parfor (hout in 1:Hout, check=0) { # all output rows + hin = (hout-1) * strideh + 1 + parfor (wout in 1:Wout, check=0) { # all output columns + win = (wout-1) * stridew + 1 + # Extract a local patch of the input image corresponding spatially to the filter sizes. + img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C) { # all channels + img_slice = matrix(img[c,], rows=Hin, cols=Win) # reshape + img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf) + } + img_cols[,(hout-1)*Wout + wout] = matrix(img_patch, rows=C*Hf*Wf, cols=1) # reshape + } + } +} + +col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf, + int strideh, int stridew, string reduction) + return (matrix[double] img) { + /* + * Create an image from columns of local image regions (patches). + * + * The reduction strategy determines how to deal with overlapping + * patches. If it is set to "add", any overlapping patches will be + * added together when creating the image. This is useful when + * computing gradients on the original image given gradients on the + * patches. Otherwise, if "none" is provided, any overlapping + * patches will just override previous ones when creating the image. + * This is useful when recreating an image from the output of + * `im2col`. + * + * Assumes original image was already padded as necessary. + * + * Inputs: + * - img_cols: Local spatial regions (patches) of the image stretched + * out into columns, of shape (C*Hf*Wf, Hout*Wout). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height, including padding. + * - Win: Input width, including padding. + * - Hf: Filter height. + * - Wf: Filter width. + * - strideh: Stride over height. + * - stridew: Stride over width. + * - reduction: The reduction strategy to use for overlapping + * patches. Valid options are "add" and "none". + * + * Outputs: + * - img: Input image, of shape (C, Hin*Win). + */ + Hout = as.integer((Hin - Hf) / strideh + 1) + Wout = as.integer((Win - Wf) / stridew + 1) + + img = matrix(0, rows=C, cols=Hin*Win) # zeros + for (hout in 1:Hout) { # all output rows + hin = (hout-1) * strideh + 1 + for (wout in 1:Wout) { # all output columns + win = (wout-1) * stridew + 1 + # Extract a local patch of the input image corresponding spatially to the filter sizes. + img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf) # zeros + parfor (c in 1:C) { # all channels + img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf) # reshape + if (reduction == "add") { + img_slice = matrix(0, rows=Hin, cols=Win) + img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice + img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win) + } else { + img_slice = matrix(img[c,], rows=Hin, cols=Win) + img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice + img[c,] = matrix(img_slice, rows=1, cols=Hin*Win) + } + } + } + } +} + +pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw) + return (matrix[double] img_padded) { + /* + * Pads an image along the height and width dimensions with zeros. + * + * Inputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + * - Hin: Input height. + * - Win: Input width. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - img_padded: The input image padded along the height and width + * dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)). + */ + C = nrow(img) + img_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros + parfor (c in 1:C) { + img_slice = matrix(img[c,], rows=Hin, cols=Win) # depth slice C reshaped + img_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw) + img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice + img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + } +} + +unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw) + return (matrix[double] img) { + /* + * Unpads an image along the height and width dimensions. + * + * Inputs: + * - img_padded: The input image padded along the height and width + * dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)). + * - Hin: Input height of unpadded image. + * - Win: Input width of unpadded image. + * - padh: Padding for top and bottom sides. + * - padw: Padding for left and right sides. + * + * Outputs: + * - img: Input image, of shape (C, Hin*Win), where C is the number + * of input channels (depth). + */ + C = nrow(img_padded) + img = matrix(0, rows=C, cols=Hin*Win) + parfor (c in 1:C) { + img_padded_slice = matrix(img_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) + img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] + img[c,] = matrix(img_slice, rows=1, cols=Hin*Win) + } +} +
