Repository: incubator-systemml Updated Branches: refs/heads/master 2e48d951b -> ac8ee2bef
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/conv_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml index fb9d02c..efd99c3 100644 --- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml +++ b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml @@ -24,6 +24,7 @@ * * This implementation is intended to be a simple, reference version. */ + forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int C, int Hin, int Win, int Hf, int Wf, int strideh, int stridew, int padh, int padw) @@ -36,9 +37,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, * This implementation is intended to be a simple, reference version. * * Inputs: - * - X: Input data matrix, of shape (N, C*Hin*Win). - * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). - * - b: Biases vector, of shape (F, 1). + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). * - C: Number of input channels (dimensionality of input depth). * - Hin: Input height. * - Win: Input width. @@ -56,8 +57,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, */ N = nrow(X) F = nrow(W) - Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) - Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) # Create output volume out = matrix(0, rows=N, cols=F*Hout*Wout) @@ -71,14 +72,14 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape } # Convolve image with filters parfor (f in 1:F, check=0) { # all filters parfor (hout in 1:Hout, check=0) { # all output rows - h0 = (hout-1) * strideh + 1 + h0 = (hout-1)*strideh + 1 parfor (wout in 1:Wout, check=0) { # all output columns - w0 = (wout-1) * stridew + 1 + w0 = (wout-1)*stridew + 1 # Create a patch of the input example corresponding spatially to the filter sizes Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros parfor (c in 1:C, check=0) { @@ -106,12 +107,13 @@ backward = function(matrix[double] dout, int Hout, int Wout, * This implementation is intended to be a simple, reference version. * * Inputs: - * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout). + * - dout: Gradient wrt `out` from upstream, of + * shape (N, F*Hout*Wout). * - Hout: Output height. * - Wout: Output width. - * - X: Previous input data matrix, of shape (N, C*Hin*Win). - * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). - * - b: Biases vector, of shape (F, 1). + * - X: Inputs, of shape (N, C*Hin*Win). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). * - C: Number of input channels (dimensionality of input depth). * - Hin: Input height. * - Win: Input width. @@ -123,14 +125,14 @@ backward = function(matrix[double] dout, int Hout, int Wout, * - padw: Padding for left and right sides. * * Outputs: - * - dX: Gradient wrt X, of shape (N, C*Hin*Win). - * - dW: Gradient wrt W, of shape (F, C*Hf*Wf). - * - db: Gradient wrt b, of shape (F, 1). + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). + * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf). + * - db: Gradient wrt `b`, of shape (F, 1). */ N = nrow(X) F = nrow(W) - Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) - Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) # Create gradient volumes dX = matrix(0, rows=N, cols=C*Hin*Win) @@ -146,7 +148,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape } dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) for (f in 1:F) { # all filters @@ -191,10 +193,11 @@ init = function(int F, int C, int Hf, int Wf) /* * Initialize the parameters of this layer. * - * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852], - * which limits the magnification of inputs/gradients during - * forward/backward passes by scaling unit-Gaussian weights by a - * factor of sqrt(2/n), under the assumption of relu neurons. + * We use the heuristic by He et al., which limits the magnification + * of inputs/gradients during forward/backward passes by scaling + * unit-Gaussian weights by a factor of sqrt(2/n), under the + * assumption of relu neurons. + * - http://arxiv.org/abs/1502.01852 * * Inputs: * - F: Number of filters. @@ -203,8 +206,8 @@ init = function(int F, int C, int Hf, int Wf) * - Wf: Filter width. * * Outputs: - * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf). - * - b: Biases vector, of shape (F, 1). + * - W: Weights, of shape (F, C*Hf*Wf). + * - b: Biases, of shape (F, 1). */ W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf)) b = matrix(0, rows=F, cols=1) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/grad_check.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml index 6b90d56..adc1c9a 100644 --- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml +++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml @@ -117,7 +117,7 @@ affine = function() { outph = affine::forward(X, W, b) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -136,7 +136,7 @@ affine = function() { outph = affine::forward(X, W, b) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -155,7 +155,7 @@ affine = function() { outph = affine::forward(X, W, b) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -214,7 +214,7 @@ batch_norm = function() { batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -235,7 +235,7 @@ batch_norm = function() { batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) gamma[i,j] = old # reset - dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative + dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh) @@ -256,7 +256,7 @@ batch_norm = function() { batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) beta[i,j] = old # reset - dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative + dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh) @@ -307,7 +307,7 @@ conv = function() { [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -326,7 +326,7 @@ conv = function() { [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -345,7 +345,7 @@ conv = function() { [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -355,7 +355,8 @@ conv = function() { conv_builtin = function() { /* - * Gradient check for the convolutional layer using built-in functions. + * Gradient check for the convolutional layer using built-in + * functions. */ print("Grad checking the built-in convolutional layer with L2 loss.") @@ -397,7 +398,7 @@ conv_builtin = function() { pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -418,7 +419,7 @@ conv_builtin = function() { pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -439,7 +440,7 @@ conv_builtin = function() { pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -491,7 +492,7 @@ conv_simple = function() { pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -512,7 +513,7 @@ conv_simple = function() { pad, pad) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -533,7 +534,7 @@ conv_simple = function() { pad, pad) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -569,7 +570,7 @@ cross_entropy_loss = function() { pred[i,j] = old + h lossph = cross_entropy_loss::forward(pred, y) pred[i,j] = old # reset W[i,j] - dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + dpred_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) @@ -609,7 +610,7 @@ dropout = function() { [outph, mask] = dropout::forward(X, p, seed) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -643,7 +644,7 @@ l1_loss = function() { pred[i,j] = old + h lossph = l1_loss::forward(pred, y) pred[i,j] = old # reset W[i,j] - dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + dpred_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) @@ -677,7 +678,7 @@ l1_reg = function() { W[i,j] = old + h reg_lossph = l1_reg::forward(W, lambda) W[i,j] = old # reset W[i,j] - dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative + dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh) @@ -711,7 +712,7 @@ l2_loss = function() { pred[i,j] = old + h lossph = l2_loss::forward(pred, y) pred[i,j] = old # reset W[i,j] - dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + dpred_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) @@ -745,7 +746,7 @@ l2_reg = function() { W[i,j] = old + h reg_lossph = l2_reg::forward(W, lambda) W[i,j] = old # reset W[i,j] - dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative + dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh) @@ -779,7 +780,7 @@ log_loss = function() { pred[i,j] = old + h lossph = log_loss::forward(pred, y) pred[i,j] = old # reset W[i,j] - dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative + dpred_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh) @@ -831,7 +832,7 @@ lstm = function() { loss_cph = l2_loss::forward(cph, yc) lossph = loss_outph + loss_cph X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -854,7 +855,7 @@ lstm = function() { loss_cph = l2_loss::forward(cph, yc) lossph = loss_outph + loss_cph W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -877,7 +878,7 @@ lstm = function() { loss_cph = l2_loss::forward(cph, yc) lossph = loss_outph + loss_cph b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -900,7 +901,7 @@ lstm = function() { loss_cph = l2_loss::forward(cph, yc) lossph = loss_outph + loss_cph out0[i,j] = old # reset - dout0_num = (lossph - lossmh) / (2 * h) # numerical derivative + dout0_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh) @@ -923,7 +924,7 @@ lstm = function() { loss_cph = l2_loss::forward(cph, yc) lossph = loss_outph + loss_cph c0[i,j] = old # reset - dc0_num = (lossph - lossmh) / (2 * h) # numerical derivative + dc0_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh) @@ -949,8 +950,8 @@ max_pool = function() { for (pad in 0:1) { print(" - Grad checking w/ pad="+pad+".") - Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1) - Wout = as.integer((Win + 2 * pad - Wf) / stride + 1) + Hout = as.integer((Hin + 2*pad - Hf)/stride + 1) + Wout = as.integer((Win + 2*pad - Wf)/stride + 1) y = rand(rows=N, cols=C*Hout*Wout) # Compute analytical gradients of loss wrt parameters @@ -971,7 +972,7 @@ max_pool = function() { [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1023,7 +1024,7 @@ max_pool_builtin = function() { pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1050,8 +1051,8 @@ max_pool_simple = function() { for (pad in 0:1) { print(" - Grad checking w/ pad="+pad+".") - Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1) - Wout = as.integer((Win + 2 * pad - Wf) / stride + 1) + Hout = as.integer((Hin + 2*pad - Hf)/stride + 1) + Wout = as.integer((Win + 2*pad - Wf)/stride + 1) y = rand(rows=N, cols=C*Hout*Wout) # Compute analytical gradients of loss wrt parameters @@ -1075,7 +1076,7 @@ max_pool_simple = function() { pad, pad) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1121,7 +1122,7 @@ relu = function() { outph = relu::forward(X) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1165,7 +1166,7 @@ rnn = function() { [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1184,7 +1185,7 @@ rnn = function() { [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) lossph = l2_loss::forward(outph, y) W[i,j] = old # reset - dW_num = (lossph - lossmh) / (2 * h) # numerical derivative + dW_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh) @@ -1203,7 +1204,7 @@ rnn = function() { [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) lossph = l2_loss::forward(outph, y) b[i,j] = old # reset - db_num = (lossph - lossmh) / (2 * h) # numerical derivative + db_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh) @@ -1222,7 +1223,7 @@ rnn = function() { [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0) lossph = l2_loss::forward(outph, y) out0[i,j] = old # reset - dout0_num = (lossph - lossmh) / (2 * h) # numerical derivative + dout0_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh) @@ -1260,7 +1261,7 @@ sigmoid = function() { outph = sigmoid::forward(X) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1299,7 +1300,7 @@ softmax = function() { outph = softmax::forward(X) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1364,7 +1365,7 @@ spatial_batch_norm = function() { ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1387,7 +1388,7 @@ spatial_batch_norm = function() { ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) gamma[i,j] = old # reset - dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative + dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh) @@ -1410,7 +1411,7 @@ spatial_batch_norm = function() { ema_mean, ema_var, mu, eps) lossph = l2_loss::forward(outph, y) beta[i,j] = old # reset - dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative + dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh) @@ -1421,7 +1422,8 @@ spatial_batch_norm = function() { tanh = function() { /* - * Gradient check for the hyperbolic tangent (tanh) nonlinearity layer. + * Gradient check for the hyperbolic tangent (tanh) nonlinearity + * layer. */ print("Grad checking the tanh nonlinearity layer with L2 loss.") @@ -1449,7 +1451,7 @@ tanh = function() { outph = tanh::forward(X) lossph = l2_loss::forward(outph, y) X[i,j] = old # reset - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1517,7 +1519,7 @@ two_layer_affine_l2_net = function() { X[i,j] = old_x + h [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) X[i,j] = old_x # reset X[i,j] - dX_num = (lossph - lossmh) / (2 * h) # numerical derivative + dX_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh) @@ -1534,7 +1536,7 @@ two_layer_affine_l2_net = function() { W1[i,j] = old_w + h [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) W1[i,j] = old_w # reset W[i,j] - dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative + dWij_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh) @@ -1551,7 +1553,7 @@ two_layer_affine_l2_net = function() { W2[i,j] = old_w + h [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) W2[i,j] = old_w # reset W[i,j] - dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative + dWij_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh) @@ -1568,7 +1570,7 @@ two_layer_affine_l2_net = function() { b1[i,j] = old_b + h [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) b1[i,j] = old_b # reset b[1,j] - dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative + dbij_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh) @@ -1585,7 +1587,7 @@ two_layer_affine_l2_net = function() { b2[i,j] = old_b + h [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2) b2[i,j] = old_b # reset b[1,j] - dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative + dbij_num = (lossph-lossmh) / (2*h) # numerical derivative # Check error rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml index 4394ffd..786b0a1 100644 --- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml +++ b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml @@ -24,6 +24,7 @@ * * This implementation is intended to be a simple, reference version. */ + forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, int strideh, int stridew, int padh, int padw) return (matrix[double] out, int Hout, int Wout) { @@ -35,7 +36,7 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, * This implementation is intended to be a simple, reference version. * * Inputs: - * - X: Input data matrix, of shape (N, C*Hin*Win). + * - X: Inputs, of shape (N, C*Hin*Win). * - C: Number of input channels (dimensionality of input depth). * - Hin: Input height. * - Win: Input width. @@ -54,8 +55,8 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf, * - Wout: Output width. */ N = nrow(X) - Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1) - Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1) + Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1) + Wout = as.integer((Win + 2*padw - Wf)/stridew + 1) # Create output volume out = matrix(0, rows=N, cols=C*Hout*Wout) @@ -99,10 +100,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, * unrolled into a single vector. * * Inputs: - * - dout: Derivatives from upstream, of shape (N, C*Hout*Wout). + * - dout: Gradient wrt `out` from upstream, of + * shape (N, C*Hout*Wout). * - Hout: Output height. * - Wout: Output width. - * - X: Input data matrix, of shape (N, C*Hin*Win). + * - X: Inputs, of shape (N, C*Hin*Win). * - C: Number of input channels (dimensionality of input depth). * - Hin: Input height. * - Win: Input width. @@ -116,7 +118,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, * A typical value is 0. * * Outputs: - * - dX: Gradient wrt X, of shape (N, C*Hin*Win). + * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). */ N = nrow(X) @@ -134,7 +136,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice - Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape + Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape } img = Xn_padded @@ -162,7 +164,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X, parfor (c in 1:C, check=0) { dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw)) dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] - dXn[c, ] = matrix(dXn_slice, rows=1, cols=Hin*Win) + dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win) } dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/util.dml ---------------------------------------------------------------------- diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml index dd0ac19..6b86225 100644 --- a/scripts/staging/SystemML-NN/nn/util.dml +++ b/scripts/staging/SystemML-NN/nn/util.dml @@ -22,14 +22,15 @@ /* * Utility functions. */ + all_equal = function(matrix[double] X1, matrix[double] X2) return(boolean equivalent) { /* * Determine if two matrices are equivalent. * * Inputs: - * - X1: Input matrix, of shape (any, any). - * - X2: Input matrix, of same shape as X1. + * - X1: Inputs, of shape (any, any). + * - X2: Inputs, of same shape as X1. * * Outputs: * - equivalent: Whether or not the two matrices are equivalent. @@ -42,12 +43,12 @@ check_all_equal = function(matrix[double] X1, matrix[double] X2) /* * Check if two matrices are equivalent, and report any issues. * - * - Issues an "ERROR" statement if elements of the two matrices - * are not equal. + * Issues an "ERROR" statement if elements of the two matrices are + * not equal. * * Inputs: - * - X1: Input matrix, of shape (any, any). - * - X2: Input matrix, of same shape as X1. + * - X1: Inputs, of shape (any, any). + * - X2: Inputs, of same shape as X1. * * Outputs: * - equivalent: Whether or not the two matrices are equivalent. @@ -61,7 +62,8 @@ check_all_equal = function(matrix[double] X1, matrix[double] X2) } } -compute_rel_error = function(double x1, double x2) return (double rel_error) { +compute_rel_error = function(double x1, double x2) + return (double rel_error) { /* * Relative error measure between two values. * @@ -74,7 +76,7 @@ compute_rel_error = function(double x1, double x2) return (double rel_error) { * Outputs: * - rel_error: Relative error measure between the two values. */ - rel_error = abs(x1 - x2) / max(1e-8, abs(x1) + abs(x2)) + rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2)) } check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn) @@ -83,10 +85,12 @@ check_rel_error = function(double x1, double x2, double thresh_error, double thr * Check and report any issues with the relative error measure between * two values. * - * - Issues an "ERROR" statement for relative errors > thresh_error, - * indicating that the implementation is likely incorrect. - * - Issues a "WARNING" statement for relative errors < thresh_error - * but > thresh_warn, indicating that the implementation may be incorrect. + * Issues an "ERROR" statement for relative errors > thresh_error, + * indicating that the implementation is likely incorrect. + * + * Issues a "WARNING" statement for relative errors < thresh_error + * but > thresh_warn, indicating that the implementation may be + * incorrect. * * Inputs: * - x1: First value. @@ -117,7 +121,7 @@ channel_sums = function(matrix[double] X, int C, int Hin, int Win) * Computes a channel-wise summation over a 4D input. * * Inputs: - * - X: Input data matrix, of shape (N, C*Hin*Win). + * - X: Inputs, of shape (N, C*Hin*Win). * - C: Number of input channels (dimensionality of input depth). * - Hin: Input height. * - Win: Input width. @@ -152,16 +156,16 @@ im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int stri * out into columns, of shape (C*Hf*Wf, Hout*Wout). */ C = nrow(img) - Hout = as.integer((Hin - Hf) / strideh + 1) - Wout = as.integer((Win - Wf) / stridew + 1) + Hout = as.integer((Hin-Hf)/strideh + 1) + Wout = as.integer((Win-Wf)/stridew + 1) # Note: We start with `img_cols` transposed to allow for row-major # left-indexing inside the loop, which is more performant. img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf) # zeros parfor (hout in 1:Hout, check=0) { # all output rows - hin = (hout-1) * strideh + 1 + hin = (hout-1)*strideh + 1 parfor (wout in 1:Wout, check=0) { # all output columns - win = (wout-1) * stridew + 1 + win = (wout-1)*stridew + 1 # Extract a local patch of the input image corresponding spatially to the filter sizes. img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros parfor (c in 1:C) { # all channels @@ -207,14 +211,14 @@ col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int * Outputs: * - img: Input image, of shape (C, Hin*Win). */ - Hout = as.integer((Hin - Hf) / strideh + 1) - Wout = as.integer((Win - Wf) / stridew + 1) + Hout = as.integer((Hin-Hf)/strideh + 1) + Wout = as.integer((Win-Wf)/stridew + 1) img = matrix(0, rows=C, cols=Hin*Win) # zeros for (hout in 1:Hout) { # all output rows - hin = (hout-1) * strideh + 1 + hin = (hout-1)*strideh + 1 for (wout in 1:Wout) { # all output columns - win = (wout-1) * stridew + 1 + win = (wout-1)*stridew + 1 # Extract a local patch of the input image corresponding spatially to the filter sizes. img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf) # zeros parfor (c in 1:C) { # all channels
