Repository: incubator-systemml Updated Branches: refs/heads/master 9451a0fd8 -> d5bb9cc2f
SystemML-NN: Updating the bias forward/backward computations in the built-in version of the `conv` function, and improving the documentation. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d5bb9cc2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d5bb9cc2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d5bb9cc2 Branch: refs/heads/master Commit: d5bb9cc2fd13c89e58e7d0123a81b17028dce1a5 Parents: 9451a0f Author: Mike Dusenberry <[email protected]> Authored: Fri Jun 10 17:34:53 2016 -0700 Committer: Mike Dusenberry <[email protected]> Committed: Fri Jun 10 17:35:42 2016 -0700 ---------------------------------------------------------------------- scripts/staging/SystemML-NN/nn/layers/conv.dml | 26 ++++++++++------- .../SystemML-NN/nn/layers/conv_builtin.dml | 30 +++++++++----------- 2 files changed, 29 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d5bb9cc2/scripts/staging/SystemML-NN/nn/layers/conv.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv.dml b/scripts/staging/SystemML-NN/nn/layers/conv.dml index 1b737f5..100bc12 100644 --- a/scripts/staging/SystemML-NN/nn/layers/conv.dml +++ b/scripts/staging/SystemML-NN/nn/layers/conv.dml @@ -50,7 +50,15 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, * - strideh: Stride over height. * - stridew: Stride over width. * - padh: Padding for top and bottom sides. + * For same output height as input, set padh = (Hf - 1) / 2, + * assuming strideh = 1. + * More generally, padh = (Hin*(strideh-1) + Hf - strideh) / 2 + * preserves the spatial dimensions of the input. * - padw: Padding for left and right sides. + * For same output width as input, set padw = (Wf - 1) / 2, + * assuming stridew = 1. + * More generally, padw = (Win*(stridew-1) + Wf - stridew) / 2 + * preserves the spatial dimensions of the input. * * Outputs: * - out: Outputs, of shape (N, F*Hout*Wout). @@ -118,15 +126,13 @@ backward = function(matrix[double] dout, int Hout, int Wout, F = nrow(W) # Create gradient volumes + # Note: Create convenience gradient volumes for dW and db that will + # allow for one gradient to be stored per example, allowing for + # parallel computation at the expense of memory. We will reduce at + # the end. dX = matrix(0, rows=N, cols=C*Hin*Win) - dW = matrix(0, rows=F, cols=C*Hf*Wf) - db = matrix(0, rows=F, cols=1) - - # Create convenience gradient volumes for dW and db that will allow - # for one gradient to be stored per example, allowing for parallel - # computation at the expense of memory. We will reduce at the end. - dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) - dbN = matrix(0, rows=N, cols=F) + dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf) + dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1) # Partial derivatives for convolution - im2col implementation parfor (n in 1:N) { # all examples @@ -136,11 +142,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw) # shape (C, (Hin+2*padh)*(Win+2*padw)) Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) - #dW = dW + doutn %*% t(Xn_padded_cols) + # dW = dW + doutn %*% t(Xn_padded_cols) dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf) # Compute db - #db = db + rowSums(doutn) + # db = db + rowSums(doutn) dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F) # Compute dX http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d5bb9cc2/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml index 7042eb2..f7bbd57 100644 --- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml +++ b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml @@ -31,11 +31,6 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, * F filters. The input data has N examples, each represented as a 3D * volume unrolled into a single vector. * - * This implementation uses `im2col` internally for each image to - * extract local image regions (patches) into columns, and then - * performs a matrix multiplication with the filters to compute the - * output maps. - * * Inputs: * - X: Input data matrix, of shape (N, C*Hin*Win). * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). @@ -48,7 +43,15 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, * - strideh: Stride over height. * - stridew: Stride over width. * - padh: Padding for top and bottom sides. + * For same output height as input, set padh = (Hf - 1) / 2, + * assuming strideh = 1. + * More generally, padh = (Hin*(strideh-1) + Hf - strideh) / 2 + * preserves the spatial dimensions of the input. * - padw: Padding for left and right sides. + * For same output width as input, set padw = (Wf - 1) / 2, + * assuming stridew = 1. + * More generally, padw = (Win*(stridew-1) + Wf - stridew) / 2 + * preserves the spatial dimensions of the input. * * Outputs: * - out: Outputs, of shape (N, F*Hout*Wout). @@ -65,10 +68,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, stride=[strideh,stridew], padding=[padh,padw]) # Add bias term to each output filter - bias = b - for (i in 1:Hout*Wout-1) - bias = cbind(bias, b) # creating shape (F, Hout*Wout) - out = out + matrix(bias, rows=1, cols=F*Hout*Wout) + # Note: Biases vector b is replicated to (F, Hout*Wout) first. + ones = matrix(1, rows=1, cols=Hout*Wout) + out = out + matrix(b %*% ones, rows=1, cols=F*Hout*Wout) } backward = function(matrix[double] dout, int Hout, int Wout, @@ -80,8 +82,6 @@ backward = function(matrix[double] dout, int Hout, int Wout, * Computes the backward pass for a 2D spatial convolutional layer * with F filters. * - * This implementation uses `im2col` and `col2im` internally. - * * Inputs: * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout). * - Hout: Output height. @@ -113,12 +113,8 @@ backward = function(matrix[double] dout, int Hout, int Wout, dX = conv2d_backward_data(W, dout, stride=[strideh, stridew], padding=[padh,padw], input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf]) - # Partial derivatives for bias terms - db = matrix(0, rows=F, cols=1) - for (n in 1:N) { - doutn = matrix(dout[n,], rows=F, cols=Hout*Wout) - db = db + rowSums(doutn) - } + # Partial derivatives for bias vector + db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout)) } init = function(int F, int C, int Hf, int Wf)
