This is an automated email from the ASF dual-hosted git repository.
niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new dd1a09b [SYSTEMML-540] Bugfix for the Keras2DML LSTM layer
dd1a09b is described below
commit dd1a09b2df610555b950c39cd4792ee70f326b5d
Author: Niketan Pansare <[email protected]>
AuthorDate: Mon Feb 4 15:31:03 2019 -0800
[SYSTEMML-540] Bugfix for the Keras2DML LSTM layer
- For the LSTM layer, Keras weights are laid out in [i, f, c, o] format,
whereas the SystemML weights are laid out in [i, f, o, c] format.
- This causes inconsistent outputs especially in transfer learning or
prediction setting where the model trained in Keras has to be used by
SystemML.
---
src/main/python/systemml/mllearn/keras2caffe.py | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/main/python/systemml/mllearn/keras2caffe.py
b/src/main/python/systemml/mllearn/keras2caffe.py
index a06113c..f6d6440 100755
--- a/src/main/python/systemml/mllearn/keras2caffe.py
+++ b/src/main/python/systemml/mllearn/keras2caffe.py
@@ -477,10 +477,30 @@ def convertKerasToCaffeSolver(kerasModel,
caffeNetworkFilePath, outCaffeSolverFi
def getInputMatrices(layer):
- if isinstance(layer, keras.layers.LSTM) or isinstance(
- layer, keras.layers.SimpleRNN):
+ if isinstance(layer, keras.layers.SimpleRNN):
weights = layer.get_weights()
return [np.vstack((weights[0], weights[1])), np.matrix(weights[2])]
+ elif isinstance(layer, keras.layers.LSTM):
+ weights = layer.get_weights()
+ W, U, b = weights[0], weights[1], weights[2]
+ units = W.shape[1]/4
+ if W.shape[1] != U.shape[1]:
+ raise Exception('Number of hidden units of the kernel and the
recurrent kernel doesnot match')
+ # Note: For the LSTM layer, Keras weights are laid out in [i, f, c, o]
format;
+ # whereas SystemML weights are laid out in [i, f, o, c] format.
+ W_i = W[:, :units]
+ W_f = W[:, units: units * 2]
+ W_c = W[:, units * 2: units * 3]
+ W_o = W[:, units * 3:]
+ U_i = U[:, :units]
+ U_f = U[:, units: units * 2]
+ U_c = U[:, units * 2: units * 3]
+ U_o = U[:, units * 3:]
+ b_i = b[:units]
+ b_f = b[units: units * 2]
+ b_c = b[units * 2: units * 3]
+ b_o = b[units * 3:]
+ return [np.vstack((np.hstack((W_i, W_f, W_o, W_c)), np.hstack((U_i,
U_f, U_o, U_c)))).reshape((-1, 4*units)), np.hstack((b_i, b_f, b_o,
b_c)).reshape((1, -1))]
else:
return [getNumPyMatrixFromKerasWeight(
param) for param in layer.get_weights()]