[SYSTEMML-2097] Minor simplifications of nn library w/ += and ifelse Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/00d72a09 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/00d72a09 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/00d72a09
Branch: refs/heads/master Commit: 00d72a092ebf3abefd006b83275c8288f06afa12 Parents: 2ef6342 Author: Matthias Boehm <[email protected]> Authored: Sat Jan 27 23:15:14 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Jan 27 23:15:14 2018 -0800 ---------------------------------------------------------------------- scripts/nn/layers/conv2d.dml | 23 ++++++----------------- scripts/nn/layers/lstm.dml | 8 ++------ scripts/nn/layers/rnn.dml | 4 ++-- 3 files changed, 10 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/conv2d.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/conv2d.dml b/scripts/nn/layers/conv2d.dml index 9d03568..49d887b 100644 --- a/scripts/nn/layers/conv2d.dml +++ b/scripts/nn/layers/conv2d.dml @@ -126,14 +126,10 @@ backward = function(matrix[double] dout, int Hout, int Wout, N = nrow(X) F = nrow(W) - # Create gradient volumes - # Note: Create convenience gradient volumes for dW and db that will - # allow for one gradient to be stored per example, allowing for - # parallel computation at the expense of memory. We will reduce at - # the end. + # Create output gradient volumes dX = matrix(0, rows=N, cols=C*Hin*Win) - dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf) - dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1) + dW = matrix(0, rows=F, cols=C*Hf*Wf) + db = matrix(0, rows=F, cols=1) # Partial derivatives for convolution - im2col implementation parfor (n in 1:N) { # all examples @@ -143,13 +139,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw)) Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew) - # dW = dW + doutn %*% t(Xn_padded_cols) - dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf) + dW += doutn %*% t(Xn_padded_cols) # Compute db - # db = db + rowSums(doutn) - dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F) - + db += rowSums(doutn) + # Compute dX dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout) dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf, @@ -157,11 +151,6 @@ backward = function(matrix[double] dout, int Hout, int Wout, dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw) dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape } - - # Reduce convenience gradient volumes with one gradient per example - # into single gradients for W and b. - dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf) - db = matrix(colSums(dbN), rows=F, cols=1) } init = function(int F, int C, int Hf, int Wf) http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/lstm.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml index 664a1e2..68d5a5a 100644 --- a/scripts/nn/layers/lstm.dml +++ b/scripts/nn/layers/lstm.dml @@ -79,12 +79,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T, out_prev = out0 c_prev = c0 c = c_prev - if (return_sequences) { - out = matrix(0, rows=N, cols=T*M) - } - else { - out = matrix(0, rows=N, cols=M) - } + out = matrix(0, rows=N, cols=ifelse(return_sequences,T*M, M)) + # caches to be used during the backward pass for performance cache_out = matrix(0, rows=T, cols=N*M) cache_c = matrix(0, rows=T, cols=N*M) http://git-wip-us.apache.org/repos/asf/systemml/blob/00d72a09/scripts/nn/layers/rnn.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/rnn.dml b/scripts/nn/layers/rnn.dml index ecea914..378de5a 100644 --- a/scripts/nn/layers/rnn.dml +++ b/scripts/nn/layers/rnn.dml @@ -146,8 +146,8 @@ backward = function(matrix[double] dout, matrix[double] X, matrix[double] W, mat } input = cbind(X_t, out_prev) # shape (N, D+M) dout_t_raw = (1-out_t^2) * dout_t # into tanh, shape (N, M) - dW = dW + t(input) %*% dout_t_raw # shape (D+M, M) - db = db + colSums(dout_t_raw) # shape (1, M) + dW += t(input) %*% dout_t_raw # shape (D+M, M) + db += colSums(dout_t_raw) # shape (1, M) dinput = dout_t_raw %*% t(W) # shape (N, D+M) dX[,(t-1)*D+1:t*D] = dinput[,1:D] dout_prev = dinput[,D+1:D+M] # shape (N, M)
