SINGA-388 Develop some RNN layers by calling tiny operations like matmul, addbias
- Develop LSTM by calling tiny operations. - Add some operations which are necessary for LSTM layer into autograd.py - redesign the structure of RNN Layer. - LSTM layer has passed test.(return correct number of gradients) Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5dc17b91 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5dc17b91 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5dc17b91 Branch: refs/heads/master Commit: 5dc17b91e045e6318d224bebee8912da6c646596 Parents: 7df6a5d Author: xuewanqi <[email protected]> Authored: Mon Aug 13 14:11:37 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Thu Aug 16 11:40:25 2018 +0000 ---------------------------------------------------------------------- python/singa/autograd.py | 143 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5dc17b91/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 4c7959c..b18e08e 100644 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -985,6 +985,41 @@ class Tanh(Operation): def tanh(x): return Tanh()(x)[0] +class Sigmoid(Operation): + + def forward(self, x): + out = singa.Sigmoid(x) + if training: + self.cache = (out,) + return out + + def backward(self, dy): + dx = singa.MultFloat(self.cache[0], -1.0) + dx = singa.AddFloat(dx, 1.0) + dx = singa.__mul__(self.cache[0], dx) + dx = singa.__mul__(dy, dx) + return dx + + +def sigmoid(x): + return Sigmoid()(x)[0] + + +class ElemMatmul(Operation): + + def forward(self, x1, x2): + if training: + self.cache = (x1, x2) + return singa.__mul__(x1, x2) + + def backward(self, dy): + dx1 = singa.__mul__(dy, self.cache[1]) + dx2 = singa.__mul__(dy, self.cache[0]) + return dx1, dx2 + + +def elemmatmul(x, y): + return ElemMatmul()(x, y)[0] def add_all(*xs): assert len(xs) > 2 @@ -993,9 +1028,22 @@ def add_all(*xs): y=add(y, x) return +class RNN(Layer): + def __init__(self): + raise NotImplementedError -class Vanilla_RNN(Layer): + def __call__(self, h0, *xs): + batchsize=xs[0].shape[0] + out=[] + h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b) + out.append(h) + for x in xs[1:]: + assert x.shape[0] == batchsize + h = self.step_forward(x, h, self.Wx, self.Wh, self.b) + out.append(h) + return out, h +class Vanilla_RNN(RNN): def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False): self.nonlinearity=nonlinearity @@ -1011,17 +1059,6 @@ class Vanilla_RNN(Layer): self.b = Tensor(shape=B_shape, requires_grad=True, stores_grad=True) self.b.set_value(0.0) - def __call__(self, h0, *xs): - batchsize=xs[0].shape[0] - self.out=[] - h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b) - self.out.append(h) - for x in xs[1:]: - assert x.shape[0] == batchsize - h = self.step_forward(x, h, self.Wx, self.Wh, self.b) - self.out.append(h) - return self.out - def step_forward(self, x, h, Wx, Wh, b): y1=matmul(x, Wx) y2=matmul(h, Wh) @@ -1035,4 +1072,86 @@ class Vanilla_RNN(Layer): raise ValueError return y +class LSTM(RNN): + + def __init__(self, input_size, hidden_size, nonlinearity='tanh', num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=False): + self.nonlinearity=nonlinearity + + Wx_shape = (input_size, hidden_size) + self.Wx = [] + for i in range(4): + w = Tensor(shape=Wx_shape, requires_grad=True, stores_grad=True) + w.gaussian(0.0, 1.0) + self.Wx.append(w) + + Wh_shape = (hidden_size, hidden_size) + self.Wh = [] + for i in range(4): + w = Tensor(shape=Wh_shape, requires_grad=True, stores_grad=True) + w.gaussian(0.0, 1.0) + self.Wh.append(w) + + Bx_shape = (hidden_size,) + self.Bx = [] + for i in range(4): + b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True) + b.set_value(0.0) + self.Bx.append(b) + + Bh_shape = (hidden_size,) + self.Bh = [] + for i in range(4): + b = Tensor(shape=Bx_shape, requires_grad=True, stores_grad=True) + b.set_value(0.0) + self.Bh.append(b) + + def __call__(self, h0, c0, *xs): + batchsize = xs[0].shape[0] + out = [] + h, c = self.step_forward( + xs[0], h0, c0, self.Wx, self.Wh, self.Bx, self.Bh) + out.append(h) + for x in xs[1:]: + assert x.shape[0] == batchsize + h, c = self.step_forward( + x, h, c, self.Wx, self.Wh, self.Bx, self.Bh) + out.append(h) + return out, h, c + + def step_forward(self, x, h, c, Wx, Wh, Bx, Bh): + y1 = matmul(x, Wx[0]) + y1 = add_bias(y1, Bx[0], axis=0) + y2 = matmul(h, Wh[0]) + y2 = add_bias(y2, Bh[0], axis=0) + i = add(y1, y2) + i = sigmoid(i) + + y1 = matmul(x, Wx[1]) + y1 = add_bias(y1, Bx[1], axis=0) + y2 = matmul(h, Wh[1]) + y2 = add_bias(y2, Bh[1], axis=0) + f = add(y1, y2) + f = sigmoid(f) + + y1 = matmul(x, Wx[2]) + y1 = add_bias(y1, Bx[2], axis=0) + y2 = matmul(h, Wh[2]) + y2 = add_bias(y2, Bh[2], axis=0) + o = add(y1, y2) + o = sigmoid(o) + + y1 = matmul(x, Wx[3]) + y1 = add_bias(y1, Bx[3], axis=0) + y2 = matmul(h, Wh[3]) + y2 = add_bias(y2, Bh[3], axis=0) + g = add(y1, y2) + g = tanh(g) + + cout1 = elemmatmul(f, c) + cout2 = elemmatmul(i, g) + cout = add(cout1, cout2) + + hout = tanh(cout) + hout = elemmatmul(o, hout) + return hout, cout
