[SYSTEMML-2097] Minor simplifications of nn library w/ += and ifelse

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/00d72a09
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/00d72a09
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/00d72a09

Branch: refs/heads/master
Commit: 00d72a092ebf3abefd006b83275c8288f06afa12
Parents: 2ef6342
Author: Matthias Boehm <[email protected]>
Authored: Sat Jan 27 23:15:14 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jan 27 23:15:14 2018 -0800

----------------------------------------------------------------------
 scripts/nn/layers/conv2d.dml | 23 ++++++-----------------
 scripts/nn/layers/lstm.dml   |  8 ++------
 scripts/nn/layers/rnn.dml    |  4 ++--
 3 files changed, 10 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/conv2d.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d.dml b/scripts/nn/layers/conv2d.dml
index 9d03568..49d887b 100644
--- a/scripts/nn/layers/conv2d.dml
+++ b/scripts/nn/layers/conv2d.dml
@@ -126,14 +126,10 @@ backward = function(matrix[double] dout, int Hout, int 
Wout,
   N = nrow(X)
   F = nrow(W)
 
-  # Create gradient volumes
-  # Note: Create convenience gradient volumes for dW and db that will
-  # allow for one gradient to be stored per example, allowing for
-  # parallel computation at the expense of memory.  We will reduce at
-  # the end.
+  # Create output gradient volumes
   dX = matrix(0, rows=N, cols=C*Hin*Win)
-  dWN = matrix(0, rows=N, cols=F*C*Hf*Wf)  # dW = matrix(0, rows=F, 
cols=C*Hf*Wf)
-  dbN = matrix(0, rows=N, cols=F)  # db = matrix(0, rows=F, cols=1)
+  dW = matrix(0, rows=F, cols=C*Hf*Wf)
+  db = matrix(0, rows=F, cols=1)
 
   # Partial derivatives for convolution - im2col implementation
   parfor (n in 1:N) {  # all examples
@@ -143,13 +139,11 @@ backward = function(matrix[double] dout, int Hout, int 
Wout,
     Xn = matrix(X[n,], rows=C, cols=Hin*Win)  # reshape
     Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0)  # shape (C, 
(Hin+2*padh)*(Win+2*padw))
     Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, 
strideh, stridew)
-    # dW = dW + doutn %*% t(Xn_padded_cols)
-    dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf)
+    dW += doutn %*% t(Xn_padded_cols)
 
     # Compute db
-    # db = db + rowSums(doutn)
-    dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F)
-
+    db += rowSums(doutn)
+    
     # Compute dX
     dXn_padded_cols = t(W) %*% doutn  # shape (C*Hf*Wf, Hout*Wout)
     dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, 
Wf,
@@ -157,11 +151,6 @@ backward = function(matrix[double] dout, int Hout, int 
Wout,
     dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw)
     dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)  # reshape
   }
-
-  # Reduce convenience gradient volumes with one gradient per example
-  # into single gradients for W and b.
-  dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf)
-  db = matrix(colSums(dbN), rows=F, cols=1)
 }
 
 init = function(int F, int C, int Hf, int Wf)

http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/lstm.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml
index 664a1e2..68d5a5a 100644
--- a/scripts/nn/layers/lstm.dml
+++ b/scripts/nn/layers/lstm.dml
@@ -79,12 +79,8 @@ forward = function(matrix[double] X, matrix[double] W, 
matrix[double] b, int T,
   out_prev = out0
   c_prev = c0
   c = c_prev
-  if (return_sequences) {
-    out = matrix(0, rows=N, cols=T*M)
-  }
-  else {
-    out = matrix(0, rows=N, cols=M)
-  }
+  out = matrix(0, rows=N, cols=ifelse(return_sequences,T*M, M))
+  
   # caches to be used during the backward pass for performance
   cache_out = matrix(0, rows=T, cols=N*M)
   cache_c = matrix(0, rows=T, cols=N*M)

http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/rnn.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/rnn.dml b/scripts/nn/layers/rnn.dml
index ecea914..378de5a 100644
--- a/scripts/nn/layers/rnn.dml
+++ b/scripts/nn/layers/rnn.dml
@@ -146,8 +146,8 @@ backward = function(matrix[double] dout, matrix[double] X, 
matrix[double] W, mat
     }
     input = cbind(X_t, out_prev)  # shape (N, D+M)
     dout_t_raw = (1-out_t^2) * dout_t  # into tanh, shape (N, M)
-    dW = dW + t(input) %*% dout_t_raw  # shape (D+M, M)
-    db = db + colSums(dout_t_raw)  # shape (1, M)
+    dW += t(input) %*% dout_t_raw  # shape (D+M, M)
+    db += colSums(dout_t_raw)  # shape (1, M)
     dinput = dout_t_raw %*% t(W)  # shape (N, D+M)
     dX[,(t-1)*D+1:t*D] = dinput[,1:D]
     dout_prev = dinput[,D+1:D+M]  # shape (N, M)

Reply via email to