SINGA-371 Implement functional operations in c++ for autograd

- make convolution operation stateless

- implement new conv2d layer linear layer by calling corresponding operations

- new developed layers have passed test in 
network(/example/autograd/mnist_cnn.py)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/82ef4179
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/82ef4179
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/82ef4179

Branch: refs/heads/master
Commit: 82ef41799ead7c6fd13746f4b155c1d98c14e6ae
Parents: aa9c52a
Author: xuewanqi <[email protected]>
Authored: Sun Jul 1 15:55:40 2018 +0000
Committer: xuewanqi <[email protected]>
Committed: Mon Jul 2 06:09:07 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 222 ++++++++++++++++++++++++++++--------------
 1 file changed, 150 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82ef4179/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index b1475bb..474fff4 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -468,41 +468,6 @@ class Conv2d(Operation):
         ret = self.PyLayer.layer.Backward(self.flag, dy)
         return (ret[0],) + ret[1]
 
-
-class Linear(Operation):
-
-    def __init__(self, in_features, out_features, bias=True):
-        self.in_features = in_features
-        self.out_features = out_features
-        self.w_shape = (in_features, out_features)
-        self.b_shape = (1, out_features)
-        self.bias = bias
-        self.init_value = False
-
-    def get_params(self):
-        assert self.init_value is True, 'must initialize before get_params()'
-        if self.bias:
-            return (self.w, self.b)
-        else:
-            return self.w
-
-    def __call__(self, x):
-        if self.init_value is False:
-            self.w = Tensor(shape=self.w_shape,
-                            requires_grad=True, stores_grad=True)
-            std = math.sqrt(2.0 / (self.in_features + self.out_features))
-            self.w.gaussian(0.0, std)
-            if self.bias:
-                self.b = Tensor(shape=self.b_shape,
-                                requires_grad=True, stores_grad=True)
-                self.b.set_value(0.0)
-            self.init_value = True
-        y = matmul(x, self.w)
-        if self.bias:
-            y = add_bias(y, self.b, axis=0)
-        return y
-
-
 class MaxPool2d(Operation):
 
     def __init__(self, kernel_size=3, stride=1, padding=0, dilation=1,
@@ -583,8 +548,8 @@ class Flatten(Operation):
 def flatten(x):
     return Flatten()(x)[0]
 
-class Conv2D(Operation):
-    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+class CONV2D(Operation):
+    '''def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                  padding=0, dilation=1, groups=1, bias=True, **kwargs):
 
         self.in_channels = in_channels
@@ -654,60 +619,52 @@ class Conv2D(Operation):
 
        xs = [x, self.W, self.b]
 
-       return self._do_forward(*xs)[0]
+       return self._do_forward(*xs)[0]'''
+    def __init__(self, handles):
+        self.handles = handles
 
-    def forward(self, *xs):
-        assert xs[0].nDim() == 4, 'The dimensions of input should be 4D.'
-        assert xs[0].shape()[1] == self.in_channels, 'in_channels dismatched.'
-        #assert 
(xs[0].shape()[2]+2*self.padding[0]-self.kernel_size[0]-1)%self.stride[0] == 0, 
'invalid padding.'
-        assert 0==0, 'invalid padding'
+    def forward(self, x, W, b):
+        #assert x.nDim() == 4, 'The dimensions of input should be 4D.'
+        #assert x.shape()[1] == self.in_channels, 'in_channels dismatched.'
+        #assert 
(xs[0].shape()[2]+2*self.padding[0]-self.kernel_size[0])%self.stride[0] == 0, 
'invalid padding.'
+        #assert 
(xs[0].shape()[3]+2*self.padding[1]-self.kernel_size[1])%self.stride[1] == 0, 
'invalid padding'
+        #assert 0 == 0, 'invalid padding'
 
         if training:
-            self.x = xs[0]
+            self.inputs = (x,W,b)
 
-        if self.device_id == -1:
-            if not hasattr (self, 'handles'):
-                self.handles = singa.ConvHandles(xs[0], self.kernel_size, 
self.stride,
-                               self.padding, self.in_channels, 
self.out_channels, self.bias)
-            elif xs[0].shape()[0] != self.handles.batchsize:
-                self.handles = singa.ConvHandles(xs[0], self.kernel_size, 
self.stride,
-                               self.padding, self.in_channels, 
self.out_channels, self.bias)
-            return singa.CpuConvForward(xs[0], xs[1], xs[2], self.handles)
+        if self.handles.device_id == -1:
+            return singa.CpuConvForward(x, W, b, self.handles)
 
         else:
-            if not hasattr(self, 'handles'):
-                self.handles = singa.CudnnConvHandles(xs[0], self.kernel_size, 
self.stride,
-                               self.padding, self.in_channels, 
self.out_channels, self.bias,
-                               
self.inner_params['workspace_MB_limit']*1024*1024, 
self.inner_params['cudnn_prefer'])
-            elif xs[0].shape()[0] != self.handles.batchsize:
-                self.handles = singa.CudnnConvHandles(xs[0], self.kernel_size, 
self.stride,
-                               self.padding, self.in_channels, 
self.out_channels, self.bias,
-                               
self.inner_params['workspace_MB_limit']*1024*1024, 
self.inner_params['cudnn_prefer'])
-            return singa.GpuConvForward(xs[0], xs[1], xs[2], self.handles)
+            return singa.GpuConvForward(x, W, b, self.handles)
 
     def backward(self, dy):
-        assert training is True and hasattr(self, 'x'), 'Please set training 
as True before do BP. '
+        assert training is True and hasattr(self, 'inputs'), 'Please set 
training as True before do BP. '
 
-        if dy.device().id() != self.device_id:
+        if dy.device().id() != self.handles.device_id:
             dy.ToDevice(self.x.device())
 
-        if self.device_id == -1: 
-            dx = singa.CpuConvBackwardx(dy, self.W.data, self.x, self.handles)
-            dW = singa.CpuConvBackwardW(dy, self.x, self.W.data, self.handles)
-            if self.bias:
-                db = singa.CpuConvBackwardb(dy, self.b.data, self.handles)
+        if self.handles.device_id == -1: 
+            dx = singa.CpuConvBackwardx(dy, self.inputs[1], self.inputs[0], 
self.handles)
+            dW = singa.CpuConvBackwardW(dy, self.inputs[0], self.inputs[1], 
self.handles)
+            if self.handles.bias:
+                db = singa.CpuConvBackwardb(dy, self.inputs[2], self.handles)
                 return dx, dW, db
             else:
                 return dx, dW
         else:
-            dx = singa.GpuConvBackwardx(dy, self.W.data, self.x, self.handles)
-            dW = singa.GpuConvBackwardW(dy, self.x, self.W.data, self.handles)
-            if self.bias:
-                db = singa.GpuConvBackwardb(dy, self.b.data, self.handles)
+            dx = singa.GpuConvBackwardx(dy, self.inputs[1], self.inputs[0], 
self.handles)
+            dW = singa.GpuConvBackwardW(dy, self.inputs[0], self.inputs[1], 
self.handles)
+            if self.handles.bias:
+                db = singa.GpuConvBackwardb(dy, self.inputs[2], self.handles)
                 return dx, dW, db
             else:
                 return dx, dW
 
+def conv2d(x,W,b,handles):
+    return CONV2D(handles)(x,W,b)[0]
+
 def infer_dependency(op):
     '''
     Infer the dependency of all operations with the
@@ -818,3 +775,124 @@ def backward(y, dy=None):
                     del not_ready[src_op]
 
     return gradients
+
+class newlayer(object):
+    def __init__(self):
+        pass
+
+    def device_check(*inputs):
+        pass
+
+
+class Linear(newlayer):
+    def __init__(self, in_features, out_features, bias=True):
+        #self.in_features = in_features
+        #self.out_features = out_features
+        w_shape = (in_features, out_features)
+        b_shape = (1, out_features)
+        self.bias = bias
+        
+        self.W = Tensor(shape=w_shape,
+                        requires_grad=True, stores_grad=True)
+        std = math.sqrt(2.0 / (in_features + out_features))
+        self.W.gaussian(0.0, std)
+        
+        if self.bias:
+            self.b = Tensor(shape=b_shape,
+                            requires_grad=True, stores_grad=True)
+            self.b.set_value(0.0)
+
+    def __call__(self, x):
+        if self.bias:
+            self.device_check(x, self.W, self.b)
+        else:
+            self.device_check(x, self.W)
+        y = matmul(x, self.W)
+        if self.bias:
+            y = add_bias(y, self.b, axis=0)
+        return y
+
+class Conv2D(newlayer):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, **kwargs):
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+        if isinstance(kernel_size, int):
+            self.kernel_size = (kernel_size, kernel_size)
+        elif isinstance(kernel_size, tuple):
+            self.kernel_size = kernel_size
+        else:
+            raise TypeError('Wrong kernel_size type.')
+        
+        if isinstance(stride, int):
+            self.stride = (stride,stride)
+        elif isinstance(stride, tuple):
+            self.stride = stride
+        else:
+            raise TypeError('Wrong stride type.')
+
+        if isinstance(padding, int):
+            self.padding = (padding,padding)
+        elif isinstance(padding, tuple):
+            self.padding = padding
+        else:
+            raise TypeError('Wrong padding type.')
+
+        if dilation != 1 or groups != 1:
+            raise ValueError('Not implemented yet')
+
+        self.bias = bias
+
+        self.inner_params = {'cudnn_prefer': 'fastest', 'workspace_MB_limit': 
1024}
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in self.inner_params:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                self.inner_params[kwarg] = kwargs[kwarg]
+        
+        w_shape = (self.out_channels, self.in_channels, self.kernel_size[0], 
self.kernel_size[1])
+        self.W = Tensor(shape=w_shape, requires_grad=True, stores_grad=True)
+        std = math.sqrt(
+                2.0 / (self.in_channels * self.kernel_size[0] * 
self.kernel_size[1] + self.out_channels))
+        self.W.gaussian(0.0, std)
+
+        if self.bias:
+            b_shape = (self.out_channels,)
+            self.b = Tensor(shape=b_shape, requires_grad=True, 
stores_grad=True)
+            self.b.set_value(0.0)
+        else:
+            #to keep consistency when to do forward.
+            self.b = Tensor(data=CTensor([1]), requires_grad=False, 
stores_grad=False)
+            self.b.set_value(0.0)
+
+    def __call__(self, x):
+        self.device_check(x, self.W, self.b)
+
+        if x.device.id() == -1:
+            if not hasattr (self, 'handles'):
+                self.handles = singa.ConvHandles(x.data, self.kernel_size, 
self.stride,
+                               self.padding, self.in_channels, 
self.out_channels, self.bias)
+            elif x.shape[0] != self.handles.batchsize:
+                self.handles = singa.ConvHandles(x.data, self.kernel_size, 
self.stride,
+                               self.padding, self.in_channels, 
self.out_channels, self.bias)
+        else:
+            if not hasattr(self, 'handles'):
+                self.handles = singa.CudnnConvHandles(x.data, 
self.kernel_size, self.stride,
+                               self.padding, self.in_channels, 
self.out_channels, self.bias,
+                               
self.inner_params['workspace_MB_limit']*1024*1024, 
self.inner_params['cudnn_prefer'])
+            elif x.shape[0] != self.handles.batchsize:
+                self.handles = singa.CudnnConvHandles(x.data, 
self.kernel_size, self.stride,
+                               self.padding, self.in_channels, 
self.out_channels, self.bias,
+                               
self.inner_params['workspace_MB_limit']*1024*1024, 
self.inner_params['cudnn_prefer'])
+        self.handles.device_id= x.device.id()
+        self.handles.bias=self.bias # can simplified
+        y = conv2d(x, self.W, self.b, self.handles)
+        return y
+
+
+
+

Reply via email to