SINGA-349 Create layer operations for autograd 1. Add Xavier initialization method.
2. package matmul and add_bias operation to form dense function 3. modified examples 4. remove unfriendly API [0] Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ed464efe Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ed464efe Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ed464efe Branch: refs/heads/master Commit: ed464efe6dbb412f7f1d4ae6e86c9db82acee92d Parents: 8146852 Author: xuewanqi <xue_wa...@u.nus.edu> Authored: Fri May 11 15:09:22 2018 +0800 Committer: Wang Wei <dcs...@nus.edu.sg> Committed: Thu May 17 21:19:07 2018 +0800 ---------------------------------------------------------------------- examples/autograd/mlp.py | 14 ++--- examples/autograd/mnist_cnn.py | 35 ++++++------- python/singa/autograd.py | 101 +++++++++++++++++++++++------------- python/singa/tensor.py | 2 +- 4 files changed, 89 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/examples/autograd/mlp.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py index 7352c21..0e42d63 100644 --- a/examples/autograd/mlp.py +++ b/examples/autograd/mlp.py @@ -58,13 +58,13 @@ if __name__ == '__main__': sgd = optimizer.SGD(0.05) # training process for i in range(1001): - x = tensor.matmul(inputs, w0) - x = tensor.add_bias(x, b0) - x = tensor.relu(x) - x = tensor.matmul(x, w1) - x = tensor.add_bias(x, b1) - x = tensor.soft_max(x) - loss = tensor.cross_entropy(x, target) + x = autograd.matmul(inputs, w0) + x = autograd.add_bias(x, b0) + x = autograd.relu(x) + x = autograd.matmul(x, w1) + x = autograd.add_bias(x, b1) + x = autograd.soft_max(x) + loss = autograd.cross_entropy(x, target) in_grads = autograd.backward(loss) for param in in_grads: http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/examples/autograd/mnist_cnn.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py index 79410e1..bc717c7 100644 --- a/examples/autograd/mnist_cnn.py +++ b/examples/autograd/mnist_cnn.py @@ -65,33 +65,32 @@ if __name__ == '__main__': print ('the shape of testing label is', y_test.shape) # operations initialization - conv1=autograd.Conv2d(3,32) - relu1 = autograd.ReLU_Layer() # same name for tensor.ReLU and layer_ops.ReLU - conv2=autograd.Conv2d(32,32) - relu2 = autograd.ReLU_Layer() - pooling = autograd.MaxPool2d() - flatten = autograd.Flatten() - linear = autograd.Linear(None, 10) # in_feature=None for input_shape auto calculation - softmax = autograd.SoftMax() - cross_entropy = autograd.CrossEntropy() + conv1=autograd.Conv2d(3, 32) + conv2=autograd.Conv2d(32, 32) + + w0 = tensor.Tensor(shape=(25088, 10), requires_grad=True, stores_grad=True) + w0.gaussian(0.0, 0.1) + b0 = tensor.Tensor(shape=(1, 10), requires_grad=True, stores_grad=True) + b0.set_value(0.0) def forward(x,t): - y=conv1(x)[0] - y=relu1(y)[0] - y=conv2(y)[0] - y=relu2(y)[0] - y=pooling(y)[0] - y=flatten(y)[0] - y=linear(y)[0] - y=softmax(y)[0] - loss=cross_entropy(y, t)[0] + y=conv1(x) + y=autograd.relu(y) + y=conv2(y) + y=autograd.relu(y) + y=autograd.max_pool_2d(y) + y=autograd.flatten(y) + y=autograd.dense(y, w0, b0) + y=autograd.soft_max(y) + loss=autograd.cross_entropy(y, t) return loss, y for epoch in range(epochs): for i in range(16): inputs = tensor.Tensor(data=x_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False) targets = tensor.Tensor(data=y_train[i * 100:(1 + i) * 100, :], requires_grad=False, stores_grad=False) + loss, y = forward(inputs, targets) accuracy_rate = accuracy(autograd.ctensor2numpy(y.data),autograd.ctensor2numpy(targets.data)) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index b55b7eb..35211de 100644 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -10,11 +10,16 @@ from singa.proto import model_pb2 from . import singa_wrap as singa import numpy as np +import math CTensor = singa.Tensor class ReLU(Operation): + def __call__(self, x, flag=True): + assert type(flag) is bool, 'flag can only be bool.' + self.flag=flag + return self._do_forward(x) def forward(self, x): ''' @@ -24,7 +29,8 @@ class ReLU(Operation): Returns: a new CTensor whose element y = x if x >= 0; otherwise 0; ''' - self.input = x + if self.flag: + self.input = x return singa.ReLU(x) def backward(self, dy): @@ -45,6 +51,10 @@ def relu(x): class Matmul(Operation): '''For matrix multiplication''' + def __call__(self, x, w, flag=True): + assert type(flag) is bool, 'flag can only be bool.' + self.flag=flag + return self._do_forward(x, w) def forward(self, x, w): '''Do forward propgation. @@ -58,7 +68,8 @@ class Matmul(Operation): Returns: a CTensor for the result ''' - self.input = (x, w) + if self.flag: + self.input = (x, w) return singa.Mult(x, w) def backward(self, dy): @@ -243,9 +254,7 @@ class Conv2d(Operation): cudnn_prefer = 'fastest' workspace_byte_limit = 1024 data_format = 'NCHW' - W_specs ={'init': 'gaussian', - 'mean':0.0, - 'std':0.1} + W_specs ={'init': 'xavier'} b_specs = {'init': 'constant'} input_sample_shape = None @@ -266,7 +275,8 @@ class Conv2d(Operation): else: inner_params[kwarg] = kwargs[kwarg] - + self.in_channels = in_channels + self.out_channels = out_channels self.W_specs=inner_params['W_specs'] self.b_specs=inner_params['b_specs'] @@ -296,25 +306,26 @@ class Conv2d(Operation): param_data = self.PyLayer.layer.param_values() if not hasattr(self, 'w'): - self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True) - if self.W_specs['init'] == 'gaussian': + self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True) + if self.W_specs['init'] == 'xavier': + std = math.sqrt(2.0/(self.in_channels+self.out_channels)) + self.w.gaussian(0.0, std) + elif self.W_specs['init'] == 'gaussian': if 'std' not in self.W_specs or 'mean' not in self.W_specs: self.w.gaussian(0.0, 0.1) else: self.w.gaussian(self.W_specs['mean'],self.W_specs['std']) elif self.W_specs['init'] == 'uniform': if 'low' not in self.W_specs or 'high' not in self.W_specs: - self.w.uniform(0.0, 1.0) + self.w.uniform(0.0, 0.1) else: self.w.uniform(self.W_specs['low'],self.W_specs['high']) - elif self.W_specs['init'] == 'xavier': - pass # TODO xs = [x, self.w] if len(param_data) == 2: if not hasattr(self, 'b'): - self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True) + self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True) if self.b_specs['init'] == 'gaussian': if 'std' not in self.b_specs or 'mean' not in self.b_specs: self.b.gaussian(0.0, 0.1) @@ -322,18 +333,19 @@ class Conv2d(Operation): self.b.gaussian(self.b_specs['mean'], self.b_specs['std']) elif self.b_specs['init'] == 'uniform': if 'low' not in self.b_specs or 'high' not in self.b_specs: - self.b.uniform(0.0, 1.0) + self.b.uniform(0.0, 0.1) else: self.b.uniform(self.b_specs['low'], self.b_specs['high']) - elif self.b_specs['init'] == 'xavier': - pass # TODO elif self.b_specs['init'] == 'constant': self.b.set_value(0.0) xs.append(self.b) xs = tuple(xs) - return self._do_forward(*xs) + return self._do_forward_0(*xs) + + def _do_forward_0(self, *xs): + return self._do_forward(*xs)[0] def forward(self, *xs): return self.PyLayer.layer.Forward(self.flag, xs[0]) @@ -351,7 +363,7 @@ class MaxPool2d(Operation): data_format = 'NCHW' input_sample_shape = None - allowed_kwargs = {'name': name, + inner_params = {'name': name, 'border_mode': border_mode, 'data_format': data_format, 'input_sample_shape': input_sample_shape @@ -361,7 +373,7 @@ class MaxPool2d(Operation): if kwarg not in allowed_kwargs: raise TypeError('Keyword argument not understood:', kwarg) else: - allowed_kwargs[kwarg] = kwargs[kwarg] + inner_params[kwarg] = kwargs[kwarg] if padding == 0: pad = None @@ -371,9 +383,9 @@ class MaxPool2d(Operation): if dilation != 1 or return_indices is not False or ceil_mode is not False: raise ValueError('Not implemented yet') - self.PyLayer = layer.Pooling2D(name, model_pb2.PoolingConf.MAX, - kernel_size, stride, border_mode, - pad, data_format, input_sample_shape) + self.PyLayer = layer.Pooling2D(inner_params['name'], model_pb2.PoolingConf.MAX, + kernel_size, stride, inner_params['border_mode'], + pad, inner_params['data_format'], inner_params['input_sample_shape']) def __call__(self, x, flag=True): assert type(flag) is bool, 'flag can only be bool.' @@ -393,8 +405,11 @@ class MaxPool2d(Operation): def backward(self, dy): return self.PyLayer.layer.Backward(0, dy)[0] +def max_pool_2d(x,kernel_size=3, stride=1, padding=0, dilation=1, return_indices=False, ceil_mode=False, **kwargs): + return MaxPool2d(kernel_size, stride, padding, dilation, return_indices, ceil_mode, **kwargs)(x)[0] + -class ReLU_Layer(Operation): +'''class ReLU_Layer(Operation): def __init__(self, name='ReLU', mode='relu',input_sample_shape=None): self.PyLayer = layer.Activation(name, mode, input_sample_shape) @@ -412,7 +427,7 @@ class ReLU_Layer(Operation): return self.PyLayer.layer.Forward(self.flag, xs[0]) def backward(self, dy): - return self.PyLayer.layer.Backward(0, dy)[0] + return self.PyLayer.layer.Backward(0, dy)[0]''' class Flatten(Operation): @@ -435,15 +450,26 @@ class Flatten(Operation): def backward(self, dy): return self.PyLayer.layer.Backward(0, dy)[0] +def flatten(x, name='Flatten', axis=1, input_sample_shape=None): + return Flatten(name,axis,input_sample_shape)(x)[0] -class Linear(Operation): +def dense(x, w, b=None, bias=True, axis=0): + if bias: + if b is None: + raise ValueError('must input bias value.') + else: + y= matmul(x, w) + y= add_bias(y, b, axis) + return y + else: + return matmul(x, w) + +'''class Linear(Operation): def __init__(self, in_features, out_features, bias=True, **kwargs): name = 'Linear' W_transpose=False - W_specs = {'init': 'gaussian', - 'mean': 0.0, - 'std': 0.1} + W_specs = {'init': 'xavier'} b_specs = {'init': 'constant'} input_sample_shape = in_features @@ -462,6 +488,8 @@ class Linear(Operation): else: inner_params[kwarg] = kwargs[kwarg] + self.in_features = in_features + self.out_features = out_features self.W_specs = W_specs self.b_specs = b_specs @@ -481,25 +509,26 @@ class Linear(Operation): param_data = self.PyLayer.layer.param_values() if not hasattr(self, 'w'): - self.w = Tensor(data=param_data[0], requires_grad=True, stores_grad=True) - if self.W_specs['init'] == 'gaussian': + self.w = Tensor(device=param_data[0].device, data=param_data[0], requires_grad=True, stores_grad=True) + if self.W_specs['init'] == 'xavier': + std = math.sqrt(2.0/(self.in_channels+self.out_channels)) + self.w.gaussian(0.0, std) + elif self.W_specs['init'] == 'gaussian': if 'std' not in self.W_specs or 'mean' not in self.W_specs: self.w.gaussian(0.0, 0.1) else: self.w.gaussian(self.W_specs['mean'],self.W_specs['std']) elif self.W_specs['init'] == 'uniform': if 'low' not in self.W_specs or 'high' not in self.W_specs: - self.w.uniform(0.0, 1.0) + self.w.uniform(0.0, 0.1) else: self.w.uniform(self.W_specs['low'],self.W_specs['high']) - elif self.W_specs['init'] == 'xavier': - pass # TODO xs = [x, self.w] if len(param_data) == 2: if not hasattr(self, 'b'): - self.b = Tensor(data=param_data[1], requires_grad=True, stores_grad=True) + self.b = Tensor(device=param_data[1].device, data=param_data[1], requires_grad=True, stores_grad=True) if self.b_specs['init'] == 'gaussian': if 'std' not in self.b_specs or 'mean' not in self.b_specs: self.b.gaussian(0.0, 0.1) @@ -507,11 +536,9 @@ class Linear(Operation): self.b.gaussian(self.b_specs['mean'], self.b_specs['std']) elif self.b_specs['init'] == 'uniform': if 'low' not in self.b_specs or 'high' not in self.b_specs: - self.b.uniform(0.0, 1.0) + self.b.uniform(0.0, 0.1) else: self.b.uniform(self.b_specs['low'], self.b_specs['high']) - elif self.b_specs['init'] == 'xavier': - pass # TODO elif self.b_specs['init'] == 'constant': self.b.set_value(0.0) @@ -525,7 +552,7 @@ class Linear(Operation): def backward(self, dy): ret = self.PyLayer.layer.Backward(0, dy) - return (ret[0],)+ret[1] + return (ret[0],)+ret[1]''' def infer_dependency(op): ''' http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ed464efe/python/singa/tensor.py ---------------------------------------------------------------------- diff --git a/python/singa/tensor.py b/python/singa/tensor.py index ff43cc6..117779e 100644 --- a/python/singa/tensor.py +++ b/python/singa/tensor.py @@ -97,7 +97,7 @@ class Tensor(object): copy_from_numpy(self.data, data) elif isinstance(data, CTensor): self.data = data - assert data.device() == device, 'not the same device' + assert data.device == device, 'not the same device' else: self.data = CTensor(list(shape), device, dtype)