SINGA-388 Develop some RNN layers by calling tiny operations like matmul, addbias
- develop Vanilla RNN by calling some smaller operations. - add some necessary operations which will be used in Vanilla RNN layer. - the developed RNN layer has passed test.(return correct number of gradients) Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7df6a5db Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7df6a5db Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7df6a5db Branch: refs/heads/master Commit: 7df6a5db4b7ef30aca561889e46e2457b35b15c3 Parents: 770d6cd Author: xuewanqi <[email protected]> Authored: Mon Aug 13 13:44:29 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Thu Aug 16 11:40:25 2018 +0000 ---------------------------------------------------------------------- python/singa/autograd.py | 74 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7df6a5db/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py old mode 100755 new mode 100644 index 56b5498..4c7959c --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -87,6 +87,7 @@ def backward(y, dy=None): a dictionary storing the gradient tensors of all tensors whose stores_grad is true (e.g. parameter tensors) ''' + assert isinstance(y, Tensor), 'wrong input type.' dependency = infer_dependency(y.creator) assert y.size() == 1, 'y must be a Tensor with a single value;'\ 'size of y is % d' % y.size() @@ -172,6 +173,7 @@ def backward(y, dy=None): del op # delete the operation to free all tensors from this op + class Operation(object): ''' An operation includes the forward and backward function of @@ -962,3 +964,75 @@ class AvgPool1d(Pooling2d): stride = kernel_size super(MaxPool2d, self).__init__( (1, kernel_size), (0, stride), (0, padding), False) + + +class Tanh(Operation): + + def forward(self, x): + out = singa.Tanh(x) + if training: + self.cache = (out,) + return out + + def backward(self, dy): + dx = singa.__mul__(self.cache[0], self.cache[0]) + dx = singa.MultFloat(dx, -1.0) + dx = singa.AddFloat(dx, 1.0) + dx = singa.__mul__(dy, dx) + return dx + + +def tanh(x): + return Tanh()(x)[0] + + +def add_all(*xs): + assert len(xs) > 2 + y=add(xs[0],xs[1]) + for x in xs[2:]: + y=add(y, x) + return + + +class Vanilla_RNN(Layer): + + def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, batch_first=False, dropout=0, bidirectional=False): + self.nonlinearity=nonlinearity + + Wx_shape = (input_size, hidden_size) + self.Wx = Tensor(shape=Wx_shape, requires_grad=True, stores_grad=True) + self.Wx.gaussian(0.0, 1.0) + + Wh_shape = (hidden_size, hidden_size) + self.Wh = Tensor(shape=Wh_shape, requires_grad=True, stores_grad=True) + self.Wh.gaussian(0.0, 1.0) + + B_shape=(hidden_size,) + self.b = Tensor(shape=B_shape, requires_grad=True, stores_grad=True) + self.b.set_value(0.0) + + def __call__(self, h0, *xs): + batchsize=xs[0].shape[0] + self.out=[] + h = self.step_forward(xs[0], h0, self.Wx, self.Wh, self.b) + self.out.append(h) + for x in xs[1:]: + assert x.shape[0] == batchsize + h = self.step_forward(x, h, self.Wx, self.Wh, self.b) + self.out.append(h) + return self.out + + def step_forward(self, x, h, Wx, Wh, b): + y1=matmul(x, Wx) + y2=matmul(h, Wh) + y=add(y1,y2) + y=add_bias(y,b,axis=0) + if self.nonlinearity == 'tanh': + y=tanh(y) + elif self.nonlinearity == 'relu': + y=relu(y) + else: + raise ValueError + return y + +
