Repository: incubator-singa Updated Branches: refs/heads/master eec0d52da -> db92c7595
SINGA-392 Update autograd API to Pytorch style Change some APIs to Pytorch style. Modified the corresponding test cases and example net. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/db92c759 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/db92c759 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/db92c759 Branch: refs/heads/master Commit: db92c75957c96764c23da40cfa5bce6b42df16d9 Parents: eec0d52 Author: xuewanqi <[email protected]> Authored: Wed Sep 26 03:14:49 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Wed Sep 26 03:14:49 2018 +0000 ---------------------------------------------------------------------- examples/autograd/mlp.py | 2 +- python/singa/autograd.py | 31 +++++++++++++++++++------------ test/python/test_operation.py | 14 +++++++------- 3 files changed, 27 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/db92c759/examples/autograd/mlp.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py index e90ff1d..dfc67b3 100755 --- a/examples/autograd/mlp.py +++ b/examples/autograd/mlp.py @@ -84,7 +84,7 @@ if __name__ == '__main__': x = autograd.relu(x) x = autograd.matmul(x, w1) x = autograd.add_bias(x, b1) - x = autograd.soft_max(x) + x = autograd.softmax(x) loss = autograd.cross_entropy(x, target) for p, gp in autograd.backward(loss): sgd.apply(0, gp, p, '') http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/db92c759/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 11ad644..e1557ca 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -448,7 +448,7 @@ class SoftMax(Operation): return singa.DefaultTranspose(dx) -def soft_max(x, axis=0): +def softmax(x, axis=0): return SoftMax(axis)(x)[0] @@ -540,7 +540,7 @@ class MeanSquareError(Operation): pass # TODO, broadcast elementwise multiply seems not support -def mean_square_error(x, t): +def mse_loss(x, t): return MeanSquareError()(x, t)[0] @@ -1076,7 +1076,8 @@ class ElemMatmul(Operation): return dx1, dx2 -def elemmatmul(x, y): +def mul(x, y): + # do pointwise multiplication return ElemMatmul()(x, y)[0] @@ -1088,7 +1089,7 @@ def add_all(*xs): return -class RNN(Layer): +class RNN_Base(Layer): def __init__(self): raise NotImplementedError @@ -1100,7 +1101,7 @@ class RNN(Layer): raise NotImplementedError -class Vanilla_RNN(RNN): +class RNN(RNN_Base): def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False): self.nonlinearity = nonlinearity @@ -1119,7 +1120,10 @@ class Vanilla_RNN(RNN): self.params = (self.Wx, self.Wh, self.b) - def __call__(self, h0, *xs): + def __call__(self, xs, h0): + # xs: a tuple or list of input tensors + if not isinstance(xs, tuple): + xs = tuple(xs) inputs = xs + (h0,) self.device_check(*inputs) #self.device_check(inputs[0], *self.params) @@ -1148,7 +1152,7 @@ class Vanilla_RNN(RNN): return y -class LSTM(RNN): +class LSTM(RNN_Base): def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): self.nonlinearity = nonlinearity @@ -1183,8 +1187,11 @@ class LSTM(RNN): self.params = self.Wx + self.Wh + self.Bx + self.Bh - def __call__(self, h0, c0, *xs): - inputs = xs + (h0, c0) + def __call__(self, xs, (h0, c0)): + # xs: a tuple or list of input tensors + if not isinstance(xs, list): + xs = list(xs) + inputs = xs + list((h0, c0)) self.device_check(*inputs) #self.device_check(inputs[0], *self.params) self.device_check(inputs[0], *(self.Wx + self.Wh + self.Bx + self.Bh)) @@ -1229,10 +1236,10 @@ class LSTM(RNN): g = add(y1, y2) g = tanh(g) - cout1 = elemmatmul(f, c) - cout2 = elemmatmul(i, g) + cout1 = mul(f, c) + cout2 = mul(i, g) cout = add(cout1, cout2) hout = tanh(cout) - hout = elemmatmul(o, hout) + hout = mul(o, hout) return hout, cout http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/db92c759/test/python/test_operation.py ---------------------------------------------------------------------- diff --git a/test/python/test_operation.py b/test/python/test_operation.py index 9fb6a57..1df8949 100755 --- a/test/python/test_operation.py +++ b/test/python/test_operation.py @@ -141,9 +141,9 @@ class TestPythonOperation(unittest.TestCase): def test_vanillaRNN_gpu_tiny_ops_shape_check(self): # gradients shape check. inputs, target, h0 = prepare_inputs_targets_for_rnn_test() - rnn = autograd.Vanilla_RNN(3, 2) + rnn = autograd.RNN(3, 2) - hs, _ = rnn(h0, *inputs) + hs, _ = rnn(inputs, h0) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -162,7 +162,7 @@ class TestPythonOperation(unittest.TestCase): rnn = autograd.LSTM(3, 2) - hs, _, _ = rnn(h0, c0, *inputs) + hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -206,10 +206,10 @@ class TestPythonOperation(unittest.TestCase): def test_numerical_gradients_check_for_vallina_rnn(self): inputs, target, h0 = prepare_inputs_targets_for_rnn_test() - rnn = autograd.Vanilla_RNN(3, 2) + rnn = autograd.RNN(3, 2) def valinna_rnn_forward(): - hs, _ = rnn(h0, *inputs) + hs, _ = rnn(inputs, h0) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -234,7 +234,7 @@ class TestPythonOperation(unittest.TestCase): rnn = autograd.LSTM(3, 2) def lstm_forward(): - hs, _, _ = rnn(h0, c0, *inputs) + hs, _, _ = rnn(inputs, (h0, c0)) loss = autograd.softmax_cross_entropy(hs[0], target[0]) for i in range(1, len(hs)): @@ -258,7 +258,7 @@ class TestPythonOperation(unittest.TestCase): x.to_device(gpu_dev) t.to_device(gpu_dev) - loss= autograd.mean_square_error(x,t) + loss= autograd.mse_loss(x,t) dx=loss.creator.backward()[0] loss_np=tensor.to_numpy(loss)
