[SYSTEMML-1444] Simplify nn-lstm layer with UDFs in expressions Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/63be18a8 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/63be18a8 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/63be18a8
Branch: refs/heads/master Commit: 63be18a840f4b898c5e91aa2f3ff2efd02690a97 Parents: 727e69e Author: Matthias Boehm <[email protected]> Authored: Mon Mar 5 15:07:18 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Mon Mar 5 15:12:47 2018 -0800 ---------------------------------------------------------------------- scripts/nn/layers/lstm.dml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/63be18a8/scripts/nn/layers/lstm.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml index 68d5a5a..44942d2 100644 --- a/scripts/nn/layers/lstm.dml +++ b/scripts/nn/layers/lstm.dml @@ -90,15 +90,12 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T, X_t = X[,(t-1)*D+1:t*D] # shape (N, D) input = cbind(X_t, out_prev) # shape (N, D+M) ifog = input %*% W + b # input, forget, output, and g gates; shape (N, 4M) - tmp = sigmoid::forward(ifog[,1:3*M]) # i,f,o gates squashed with sigmoid - ifog[,1:3*M] = tmp - tmp = tanh::forward(ifog[,3*M+1:4*M]) # g gate squashed with tanh - ifog[,3*M+1:4*M] = tmp + ifog[,1:3*M] = sigmoid::forward(ifog[,1:3*M]) # i,f,o gates squashed with sigmoid + ifog[,3*M+1:4*M] = tanh::forward(ifog[,3*M+1:4*M]) # g gate squashed with tanh # c_t = f*prev_c + i*g c = ifog[,M+1:2*M]*c_prev + ifog[,1:M]*ifog[,3*M+1:4*M] # shape (N, M) # out_t = o*tanh(c) - tmp = tanh::forward(c) - out_t = ifog[,2*M+1:3*M] * tmp # shape (N, M) + out_t = ifog[,2*M+1:3*M] * tanh::forward(c) # shape (N, M) # store if (return_sequences) { @@ -202,10 +199,8 @@ backward = function(matrix[double] dout, matrix[double] dc, o = ifog[,2*M+1:3*M] # output gate, shape (N, M) g = ifog[,3*M+1:4*M] # g gate, shape (N, M) - tmp = tanh::backward(dout_t, ct) - dct = dct + o*tmp # shape (N, M) - tmp = tanh::forward(ct) - do = tmp * dout_t # output gate, shape (N, M) + dct = dct + o*tanh::backward(dout_t, ct) # shape (N, M) + do = tanh::forward(ct) * dout_t # output gate, shape (N, M) df = c_prev * dct # forget gate, shape (N, M) dc_prev = f * dct # shape (N, M) di = g * dct # input gate, shape (N, M)
