SINGA-371 Implement functional operations in c++ for autograd - integrate convolution functions into conv2d autograd operation(gpu part)
- export the field 'batchsize' of CudnnConvHandle to python as it is needed in Con2d_GPU.__call__(). - set default 'workspace_byte_limit' as 1GB, which is consistent with the default setting in Conv2D Layer. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c57b87ae Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c57b87ae Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c57b87ae Branch: refs/heads/master Commit: c57b87ae7ffd051d818b048de3c20c69643cbd25 Parents: 2cac057 Author: xuewanqi <[email protected]> Authored: Wed Jun 20 08:46:25 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Wed Jun 20 14:47:37 2018 +0000 ---------------------------------------------------------------------- python/singa/autograd.py | 99 ++++++++++++++++++++++++++++++++++ src/api/model_operation.i | 4 +- src/model/convolution_functions.h | 2 +- 3 files changed, 102 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 83362e2..c7e0adb 100644 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -583,6 +583,105 @@ class Flatten(Operation): def flatten(x): return Flatten()(x)[0] +class Conv2d_GPU(Operation): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, **kwargs): + + self.in_channels = in_channels + self.out_channels = out_channels + + if isinstance(kernel_size, int): + self.kernel_size = (kernel_size, kernel_size) + elif isinstance(kernel_size, tuple): + self.kernel_size = kernel_size + else: + raise TypeError('Wrong kernel_size type.') + + if isinstance(stride, int): + self.stride = (stride,stride) + elif isinstance(stride, tuple): + self.stride = stride + else: + raise TypeError('Wrong stride type.') + + if isinstance(padding, int): + self.padding = (padding,padding) + elif isinstance(padding, tuple): + self.padding = padding + else: + raise TypeError('Wrong padding type.') + + if dilation != 1 or groups != 1: + raise ValueError('Not implemented yet') + + self.bias = bias + + inner_params = {'cudnn_prefer': 'fastest', 'workspace_byte_limit': 1024} + # TODO valid value of inner_params check + + for kwarg in kwargs: + if kwarg not in inner_params: + raise TypeError('Keyword argument not understood:', kwarg) + else: + inner_params[kwarg] = kwargs[kwarg] + + self.convhandle = singa.SetupConv(self.kernel_size[0], self.kernel_size[1], + self.padding[0], self.padding[1], self.stride[0], self.stride[1], + self.bias, inner_params['workspace_byte_limit']*1024*1024, + inner_params['cudnn_prefer']) + + w_shape = (self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1]) + self.W = Tensor(shape=w_shape, requires_grad=True, stores_grad=True) + std = math.sqrt( + 2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels)) + self.W.gaussian(0.0, std) + + if self.bias: + b_shape = (self.out_channels,) + else: + b_shape = (1,) #to keep consistency when to do forward. + self.b = Tensor(shape=b_shape, requires_grad=True, stores_grad=True) + self.b.set_value(0.0) + + + def __call__(self, x): + assert x.ndim() == 4, 'The dimensions of input should be 4D.' + assert x.shape[1] == self.in_channels, 'in_channels dismatched.' + assert 0 == 0, 'invalid padding.' + # TODO valid padding check. + + if not hasattr (self, cudnnconvhandle): + self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle) + elif x.shape[0] != self.cudnnconvhandle.batchsize: + self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle) + + self.dev = x.device + + self.W.to_device(self.dev) + xs = [x, self.W] + + self.b.to_device(self.dev) + xs.append(self.b) + return self._do_forward(*xs)[0] + + def forward(self, *xs): + if training: + self.x = xs[0] + return singa.CudnnConvForward(xs[0], xs[1], xs[2], self.convhandle, self.cudnnconvhandle) + + def backward(self, dy): + assert training is True and hasattr(self, x), 'Please set \'trainging\' as True before do BP. ' + + # todo check device? + dy.ToDevice(self.dev) + + dx = singa.CudnnConvBackwardx(dy, self.W, self.x, self.cch) + dW = singa.CudnnConvBackwardW(dy, self.x, self.W, self.cch) + if self.bias: + db = singa.CudnnConvBackwardb(dy, self.b, self.cch) + return dx, dW, db + else: + return dx, dW def infer_dependency(op): ''' http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/api/model_operation.i ---------------------------------------------------------------------- diff --git a/src/api/model_operation.i b/src/api/model_operation.i index 77ef6bb..a74ec5e 100644 --- a/src/api/model_operation.i +++ b/src/api/model_operation.i @@ -7,14 +7,14 @@ namespace singa{ struct ConvHandle{}; -struct CudnnConvHandle{}; +struct CudnnConvHandle{size_t batchsize;}; ConvHandle SetupConv( const size_t kernel_h_, const size_t kernel_w_, const size_t pad_h_, const size_t pad_w_, const size_t stride_h_,const size_t stride_w_, const size_t channels_, const size_t num_filters_, - const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024, + const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024*1024, const std::string prefer_="fastest"); CudnnConvHandle InitCudnn(const Tensor &input, const ConvHandle ch); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/model/convolution_functions.h ---------------------------------------------------------------------- diff --git a/src/model/convolution_functions.h b/src/model/convolution_functions.h index 9462805..e34423f 100644 --- a/src/model/convolution_functions.h +++ b/src/model/convolution_functions.h @@ -48,7 +48,7 @@ ConvHandle SetupConv( const size_t pad_h_, const size_t pad_w_, const size_t stride_h_,const size_t stride_w_, const size_t channels_, const size_t num_filters_, - const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024, + const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024*1024, const std::string prefer_="fastest"); void testInitCudnn(const Tensor &input, const ConvHandle ch);
