Repository: incubator-systemml
Updated Branches:
  refs/heads/master 2e48d951b -> ac8ee2bef


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml 
b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
index fb9d02c..efd99c3 100644
--- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
@@ -24,6 +24,7 @@
  *
  * This implementation is intended to be a simple, reference version.
  */
+
 forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
                    int C, int Hin, int Win, int Hf, int Wf,
                    int strideh, int stridew, int padh, int padw)
@@ -36,9 +37,9 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b,
    * This implementation is intended to be a simple, reference version.
    *
    * Inputs:
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
-   *  - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
-   *  - b: Biases vector, of shape (F, 1).
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - b: Biases, of shape (F, 1).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
@@ -56,8 +57,8 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b,
    */
   N = nrow(X)
   F = nrow(W)
-  Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
-  Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+  Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+  Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
 
   # Create output volume
   out = matrix(0, rows=N, cols=F*Hout*Wout)
@@ -71,14 +72,14 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b,
       Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win)  # depth slice C reshaped
       Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
       Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
-      Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
+      Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
     }
     # Convolve image with filters
     parfor (f in 1:F, check=0) {  # all filters
       parfor (hout in 1:Hout, check=0) {  # all output rows
-        h0 = (hout-1) * strideh + 1
+        h0 = (hout-1)*strideh + 1
         parfor (wout in 1:Wout, check=0) {  # all output columns
-          w0 = (wout-1) * stridew + 1
+          w0 = (wout-1)*stridew + 1
           # Create a patch of the input example corresponding spatially to the 
filter sizes
           Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf)  # zeros
           parfor (c in 1:C, check=0) {
@@ -106,12 +107,13 @@ backward = function(matrix[double] dout, int Hout, int 
Wout,
    * This implementation is intended to be a simple, reference version.
    *
    * Inputs:
-   *  - dout: Derivatives from upstream, of shape (N, F*Hout*Wout).
+   *  - dout: Gradient wrt `out` from upstream, of
+   *      shape (N, F*Hout*Wout).
    *  - Hout: Output height.
    *  - Wout: Output width.
-   *  - X: Previous input data matrix, of shape (N, C*Hin*Win).
-   *  - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
-   *  - b: Biases vector, of shape (F, 1).
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - b: Biases, of shape (F, 1).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
@@ -123,14 +125,14 @@ backward = function(matrix[double] dout, int Hout, int 
Wout,
    *  - padw: Padding for left and right sides.
    *
    * Outputs:
-   *  - dX: Gradient wrt X, of shape (N, C*Hin*Win).
-   *  - dW: Gradient wrt W, of shape (F, C*Hf*Wf).
-   *  - db: Gradient wrt b, of shape (F, 1).
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+   *  - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+   *  - db: Gradient wrt `b`, of shape (F, 1).
    */
   N = nrow(X)
   F = nrow(W)
-  Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
-  Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+  Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+  Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
 
   # Create gradient volumes
   dX = matrix(0, rows=N, cols=C*Hin*Win)
@@ -146,7 +148,7 @@ backward = function(matrix[double] dout, int Hout, int Wout,
       Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win)  # depth slice C reshaped
       Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
       Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
-      Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
+      Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
     }
     dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
     for (f in 1:F) {  # all filters
@@ -191,10 +193,11 @@ init = function(int F, int C, int Hf, int Wf)
   /*
    * Initialize the parameters of this layer.
    *
-   * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
-   * which limits the magnification of inputs/gradients during
-   * forward/backward passes by scaling unit-Gaussian weights by a
-   * factor of sqrt(2/n), under the assumption of relu neurons.
+   * We use the heuristic by He et al., which limits the magnification
+   * of inputs/gradients during forward/backward passes by scaling
+   * unit-Gaussian weights by a factor of sqrt(2/n), under the
+   * assumption of relu neurons.
+   *  - http://arxiv.org/abs/1502.01852
    *
    * Inputs:
    *  - F: Number of filters.
@@ -203,8 +206,8 @@ init = function(int F, int C, int Hf, int Wf)
    *  - Wf: Filter width.
    *
    * Outputs:
-   *  - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
-   *  - b: Biases vector, of shape (F, 1).
+   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - b: Biases, of shape (F, 1).
    */
   W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
   b = matrix(0, rows=F, cols=1)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml 
b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index 6b90d56..adc1c9a 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -117,7 +117,7 @@ affine = function() {
       outph = affine::forward(X, W, b)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -136,7 +136,7 @@ affine = function() {
       outph = affine::forward(X, W, b)
       lossph = l2_loss::forward(outph, y)
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -155,7 +155,7 @@ affine = function() {
       outph = affine::forward(X, W, b)
       lossph = l2_loss::forward(outph, y)
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -214,7 +214,7 @@ batch_norm = function() {
             batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, 
eps)
         lossph = l2_loss::forward(outph, y)
         X[i,j] = old  # reset
-        dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -235,7 +235,7 @@ batch_norm = function() {
             batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, 
eps)
         lossph = l2_loss::forward(outph, y)
         gamma[i,j] = old  # reset
-        dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dgamma_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, 
lossph, lossmh)
@@ -256,7 +256,7 @@ batch_norm = function() {
             batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, 
eps)
         lossph = l2_loss::forward(outph, y)
         beta[i,j] = old  # reset
-        dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dbeta_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, 
lossmh)
@@ -307,7 +307,7 @@ conv = function() {
       [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, 
stride, stride, pad, pad)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -326,7 +326,7 @@ conv = function() {
       [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, 
stride, stride, pad, pad)
       lossph = l2_loss::forward(outph, y)
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -345,7 +345,7 @@ conv = function() {
       [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, 
stride, stride, pad, pad)
       lossph = l2_loss::forward(outph, y)
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -355,7 +355,8 @@ conv = function() {
 
 conv_builtin = function() {
   /*
-   * Gradient check for the convolutional layer using built-in functions.
+   * Gradient check for the convolutional layer using built-in
+   * functions.
    */
   print("Grad checking the built-in convolutional layer with L2 loss.")
 
@@ -397,7 +398,7 @@ conv_builtin = function() {
                                                   pad, pad)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -418,7 +419,7 @@ conv_builtin = function() {
                                                   pad, pad)
       lossph = l2_loss::forward(outph, y)
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -439,7 +440,7 @@ conv_builtin = function() {
                                                   pad, pad)
       lossph = l2_loss::forward(outph, y)
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -491,7 +492,7 @@ conv_simple = function() {
                                                  pad, pad)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -512,7 +513,7 @@ conv_simple = function() {
                                                  pad, pad)
       lossph = l2_loss::forward(outph, y)
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -533,7 +534,7 @@ conv_simple = function() {
                                                  pad, pad)
       lossph = l2_loss::forward(outph, y)
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -569,7 +570,7 @@ cross_entropy_loss = function() {
       pred[i,j] = old + h
       lossph = cross_entropy_loss::forward(pred, y)
       pred[i,j] = old  # reset W[i,j]
-      dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, 
lossmh)
@@ -609,7 +610,7 @@ dropout = function() {
       [outph, mask] = dropout::forward(X, p, seed)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -643,7 +644,7 @@ l1_loss = function() {
       pred[i,j] = old + h
       lossph = l1_loss::forward(pred, y)
       pred[i,j] = old  # reset W[i,j]
-      dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, 
lossmh)
@@ -677,7 +678,7 @@ l1_reg = function() {
       W[i,j] = old + h
       reg_lossph = l1_reg::forward(W, lambda)
       W[i,j] = old  # reset W[i,j]
-      dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative
+      dW_num = (reg_lossph-reg_lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, 
reg_lossmh)
@@ -711,7 +712,7 @@ l2_loss = function() {
       pred[i,j] = old + h
       lossph = l2_loss::forward(pred, y)
       pred[i,j] = old  # reset W[i,j]
-      dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, 
lossmh)
@@ -745,7 +746,7 @@ l2_reg = function() {
       W[i,j] = old + h
       reg_lossph = l2_reg::forward(W, lambda)
       W[i,j] = old  # reset W[i,j]
-      dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative
+      dW_num = (reg_lossph-reg_lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, 
reg_lossmh)
@@ -779,7 +780,7 @@ log_loss = function() {
       pred[i,j] = old + h
       lossph = log_loss::forward(pred, y)
       pred[i,j] = old  # reset W[i,j]
-      dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, 
lossmh)
@@ -831,7 +832,7 @@ lstm = function() {
       loss_cph = l2_loss::forward(cph, yc)
       lossph = loss_outph + loss_cph
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -854,7 +855,7 @@ lstm = function() {
       loss_cph = l2_loss::forward(cph, yc)
       lossph = loss_outph + loss_cph
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -877,7 +878,7 @@ lstm = function() {
       loss_cph = l2_loss::forward(cph, yc)
       lossph = loss_outph + loss_cph
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -900,7 +901,7 @@ lstm = function() {
       loss_cph = l2_loss::forward(cph, yc)
       lossph = loss_outph + loss_cph
       out0[i,j] = old  # reset
-      dout0_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dout0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, 
lossmh)
@@ -923,7 +924,7 @@ lstm = function() {
       loss_cph = l2_loss::forward(cph, yc)
       lossph = loss_outph + loss_cph
       c0[i,j] = old  # reset
-      dc0_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dc0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
@@ -949,8 +950,8 @@ max_pool = function() {
 
   for (pad in 0:1) {
     print(" - Grad checking w/ pad="+pad+".")
-    Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1)
-    Wout = as.integer((Win + 2 * pad - Wf) / stride + 1)
+    Hout = as.integer((Hin + 2*pad - Hf)/stride + 1)
+    Wout = as.integer((Win + 2*pad - Wf)/stride + 1)
     y = rand(rows=N, cols=C*Hout*Wout)
 
     # Compute analytical gradients of loss wrt parameters
@@ -971,7 +972,7 @@ max_pool = function() {
         [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, 
stride, stride, pad, pad)
         lossph = l2_loss::forward(outph, y)
         X[i,j] = old  # reset
-        dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1023,7 +1024,7 @@ max_pool_builtin = function() {
                                                         pad, pad)
         lossph = l2_loss::forward(outph, y)
         X[i,j] = old  # reset
-        dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1050,8 +1051,8 @@ max_pool_simple = function() {
 
   for (pad in 0:1) {
     print(" - Grad checking w/ pad="+pad+".")
-    Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1)
-    Wout = as.integer((Win + 2 * pad - Wf) / stride + 1)
+    Hout = as.integer((Hin + 2*pad - Hf)/stride + 1)
+    Wout = as.integer((Win + 2*pad - Wf)/stride + 1)
     y = rand(rows=N, cols=C*Hout*Wout)
 
     # Compute analytical gradients of loss wrt parameters
@@ -1075,7 +1076,7 @@ max_pool_simple = function() {
                                                        pad, pad)
         lossph = l2_loss::forward(outph, y)
         X[i,j] = old  # reset
-        dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1121,7 +1122,7 @@ relu = function() {
       outph = relu::forward(X)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1165,7 +1166,7 @@ rnn = function() {
       [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1184,7 +1185,7 @@ rnn = function() {
       [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
       lossph = l2_loss::forward(outph, y)
       W[i,j] = old  # reset
-      dW_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -1203,7 +1204,7 @@ rnn = function() {
       [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
       lossph = l2_loss::forward(outph, y)
       b[i,j] = old  # reset
-      db_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -1222,7 +1223,7 @@ rnn = function() {
       [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
       lossph = l2_loss::forward(outph, y)
       out0[i,j] = old  # reset
-      dout0_num = (lossph - lossmh) / (2 * h)  # numerical derivative
+      dout0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, 
lossmh)
@@ -1260,7 +1261,7 @@ sigmoid = function() {
       outph = sigmoid::forward(X)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1299,7 +1300,7 @@ softmax = function() {
       outph = softmax::forward(X)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1364,7 +1365,7 @@ spatial_batch_norm = function() {
                                         ema_mean, ema_var, mu, eps)
         lossph = l2_loss::forward(outph, y)
         X[i,j] = old  # reset
-        dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1387,7 +1388,7 @@ spatial_batch_norm = function() {
                                         ema_mean, ema_var, mu, eps)
         lossph = l2_loss::forward(outph, y)
         gamma[i,j] = old  # reset
-        dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dgamma_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, 
lossph, lossmh)
@@ -1410,7 +1411,7 @@ spatial_batch_norm = function() {
                                         ema_mean, ema_var, mu, eps)
         lossph = l2_loss::forward(outph, y)
         beta[i,j] = old  # reset
-        dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative
+        dbeta_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
         rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, 
lossmh)
@@ -1421,7 +1422,8 @@ spatial_batch_norm = function() {
 
 tanh = function() {
   /*
-   * Gradient check for the hyperbolic tangent (tanh) nonlinearity layer.
+   * Gradient check for the hyperbolic tangent (tanh) nonlinearity
+   * layer.
    */
   print("Grad checking the tanh nonlinearity layer with L2 loss.")
 
@@ -1449,7 +1451,7 @@ tanh = function() {
       outph = tanh::forward(X)
       lossph = l2_loss::forward(outph, y)
       X[i,j] = old  # reset
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1517,7 +1519,7 @@ two_layer_affine_l2_net = function() {
       X[i,j] = old_x + h
       [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, 
b1, W2, b2)
       X[i,j] = old_x  # reset X[i,j]
-      dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1534,7 +1536,7 @@ two_layer_affine_l2_net = function() {
       W1[i,j] = old_w + h
       [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, 
b1, W2, b2)
       W1[i,j] = old_w  # reset W[i,j]
-      dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dWij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, 
lossmh)
@@ -1551,7 +1553,7 @@ two_layer_affine_l2_net = function() {
       W2[i,j] = old_w + h
       [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, 
b1, W2, b2)
       W2[i,j] = old_w  # reset W[i,j]
-      dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dWij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, 
lossmh)
@@ -1568,7 +1570,7 @@ two_layer_affine_l2_net = function() {
       b1[i,j] = old_b + h
       [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, 
b1, W2, b2)
       b1[i,j] = old_b  # reset b[1,j]
-      dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dbij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, 
lossmh)
@@ -1585,7 +1587,7 @@ two_layer_affine_l2_net = function() {
       b2[i,j] = old_b + h
       [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, 
b1, W2, b2)
       b2[i,j] = old_b  # reset b[1,j]
-      dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+      dbij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
       rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, 
lossmh)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml 
b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
index 4394ffd..786b0a1 100644
--- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
@@ -24,6 +24,7 @@
  *
  * This implementation is intended to be a simple, reference version.
  */
+
 forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
                    int strideh, int stridew, int padh, int padw)
     return (matrix[double] out, int Hout, int Wout) {
@@ -35,7 +36,7 @@ forward = function(matrix[double] X, int C, int Hin, int Win, 
int Hf, int Wf,
    * This implementation is intended to be a simple, reference version.
    *
    * Inputs:
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - X: Inputs, of shape (N, C*Hin*Win).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
@@ -54,8 +55,8 @@ forward = function(matrix[double] X, int C, int Hin, int Win, 
int Hf, int Wf,
    *  - Wout: Output width.
    */
   N = nrow(X)
-  Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
-  Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+  Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+  Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
 
   # Create output volume
   out = matrix(0, rows=N, cols=C*Hout*Wout)
@@ -99,10 +100,11 @@ backward = function(matrix[double] dout, int Hout, int 
Wout, matrix[double] X,
    * unrolled into a single vector.
    *
    * Inputs:
-   *  - dout: Derivatives from upstream, of shape (N, C*Hout*Wout).
+   *  - dout: Gradient wrt `out` from upstream, of
+   *      shape (N, C*Hout*Wout).
    *  - Hout: Output height.
    *  - Wout: Output width.
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - X: Inputs, of shape (N, C*Hin*Win).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
@@ -116,7 +118,7 @@ backward = function(matrix[double] dout, int Hout, int 
Wout, matrix[double] X,
    *      A typical value is 0.
    *
    * Outputs:
-   *  - dX: Gradient wrt X, of shape (N, C*Hin*Win).
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
    */
   N = nrow(X)
 
@@ -134,7 +136,7 @@ backward = function(matrix[double] dout, int Hout, int 
Wout, matrix[double] X,
       Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win)  # depth slice C reshaped
       Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
       Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
-      Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
+      Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, 
cols=(Hin+2*padh)*(Win+2*padw))  # reshape
     }
     img = Xn_padded
 
@@ -162,7 +164,7 @@ backward = function(matrix[double] dout, int Hout, int 
Wout, matrix[double] X,
     parfor (c in 1:C, check=0) {
       dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
       dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
-      dXn[c, ] = matrix(dXn_slice, rows=1, cols=Hin*Win)
+      dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
     }
     dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
   }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/util.dml 
b/scripts/staging/SystemML-NN/nn/util.dml
index dd0ac19..6b86225 100644
--- a/scripts/staging/SystemML-NN/nn/util.dml
+++ b/scripts/staging/SystemML-NN/nn/util.dml
@@ -22,14 +22,15 @@
 /*
  * Utility functions.
  */
+
 all_equal = function(matrix[double] X1, matrix[double] X2)
     return(boolean equivalent) {
   /*
    * Determine if two matrices are equivalent.
    *
    * Inputs:
-   *  - X1: Input matrix, of shape (any, any).
-   *  - X2: Input matrix, of same shape as X1.
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
    *
    * Outputs:
    *  - equivalent: Whether or not the two matrices are equivalent.
@@ -42,12 +43,12 @@ check_all_equal = function(matrix[double] X1, 
matrix[double] X2)
   /*
    * Check if two matrices are equivalent, and report any issues.
    *
-   *  - Issues an "ERROR" statement if elements of the two matrices
-   *  are not equal.
+   * Issues an "ERROR" statement if elements of the two matrices are
+   * not equal.
    *
    * Inputs:
-   *  - X1: Input matrix, of shape (any, any).
-   *  - X2: Input matrix, of same shape as X1.
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
    *
    * Outputs:
    *  - equivalent: Whether or not the two matrices are equivalent.
@@ -61,7 +62,8 @@ check_all_equal = function(matrix[double] X1, matrix[double] 
X2)
   }
 }
 
-compute_rel_error = function(double x1, double x2) return (double rel_error) {
+compute_rel_error = function(double x1, double x2)
+    return (double rel_error) {
   /*
    * Relative error measure between two values.
    *
@@ -74,7 +76,7 @@ compute_rel_error = function(double x1, double x2) return 
(double rel_error) {
    * Outputs:
    *  - rel_error: Relative error measure between the two values.
    */
-  rel_error = abs(x1 - x2) / max(1e-8, abs(x1) + abs(x2))
+  rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
 }
 
 check_rel_error = function(double x1, double x2, double thresh_error, double 
thresh_warn)
@@ -83,10 +85,12 @@ check_rel_error = function(double x1, double x2, double 
thresh_error, double thr
    * Check and report any issues with the relative error measure between
    * two values.
    *
-   *  - Issues an "ERROR" statement for relative errors > thresh_error,
-   *  indicating that the implementation is likely incorrect.
-   *  - Issues a "WARNING" statement for relative errors < thresh_error
-   *  but > thresh_warn, indicating that the implementation may be incorrect.
+   * Issues an "ERROR" statement for relative errors > thresh_error,
+   * indicating that the implementation is likely incorrect.
+   *
+   * Issues a "WARNING" statement for relative errors < thresh_error
+   * but > thresh_warn, indicating that the implementation may be
+   * incorrect.
    *
    * Inputs:
    *  - x1: First value.
@@ -117,7 +121,7 @@ channel_sums = function(matrix[double] X, int C, int Hin, 
int Win)
    * Computes a channel-wise summation over a 4D input.
    *
    * Inputs:
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
+   *  - X: Inputs, of shape (N, C*Hin*Win).
    *  - C: Number of input channels (dimensionality of input depth).
    *  - Hin: Input height.
    *  - Win: Input width.
@@ -152,16 +156,16 @@ im2col = function(matrix[double] img, int Hin, int Win, 
int Hf, int Wf, int stri
    *      out into columns, of shape (C*Hf*Wf, Hout*Wout).
    */
   C = nrow(img)
-  Hout = as.integer((Hin - Hf) / strideh + 1)
-  Wout = as.integer((Win - Wf) / stridew + 1)
+  Hout = as.integer((Hin-Hf)/strideh + 1)
+  Wout = as.integer((Win-Wf)/stridew + 1)
 
   # Note: We start with `img_cols` transposed to allow for row-major
   # left-indexing inside the loop, which is more performant.
   img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf)  # zeros
   parfor (hout in 1:Hout, check=0) {  # all output rows
-    hin = (hout-1) * strideh + 1
+    hin = (hout-1)*strideh + 1
     parfor (wout in 1:Wout, check=0) {  # all output columns
-      win = (wout-1) * stridew + 1
+      win = (wout-1)*stridew + 1
       # Extract a local patch of the input image corresponding spatially to 
the filter sizes.
       img_patch = matrix(0, rows=C, cols=Hf*Wf)  # zeros
       parfor (c in 1:C) {  # all channels
@@ -207,14 +211,14 @@ col2im = function(matrix[double] img_cols, int C, int 
Hin, int Win, int Hf, int
    * Outputs:
    *  - img: Input image, of shape (C, Hin*Win).
    */
-  Hout = as.integer((Hin - Hf) / strideh + 1)
-  Wout = as.integer((Win - Wf) / stridew + 1)
+  Hout = as.integer((Hin-Hf)/strideh + 1)
+  Wout = as.integer((Win-Wf)/stridew + 1)
 
   img = matrix(0, rows=C, cols=Hin*Win)  # zeros
   for (hout in 1:Hout) {  # all output rows
-    hin = (hout-1) * strideh + 1
+    hin = (hout-1)*strideh + 1
     for (wout in 1:Wout) {  # all output columns
-      win = (wout-1) * stridew + 1
+      win = (wout-1)*stridew + 1
       # Extract a local patch of the input image corresponding spatially to 
the filter sizes.
       img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf)  
# zeros
       parfor (c in 1:C) {  # all channels

Reply via email to