SINGA-379 Implement batchnorm operation and its related functions for autograd
Test mnist_cnn.py with batchnorm Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f134a24e Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f134a24e Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f134a24e Branch: refs/heads/master Commit: f134a24e2b58baad9dc29167e323d14cdf89d2a4 Parents: ce1a733 Author: wang wei <[email protected]> Authored: Thu Jul 12 12:28:41 2018 +0800 Committer: wang wei <[email protected]> Committed: Thu Jul 12 12:33:04 2018 +0800 ---------------------------------------------------------------------- examples/autograd/mnist_cnn.py | 4 ++++ python/singa/autograd.py | 10 +++++----- src/api/model_layer.i | 18 +++++++++--------- src/api/model_operation.i | 9 +++++---- src/model/operation/batchnorm.cc | 31 ++++++++++++++----------------- 5 files changed, 37 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/examples/autograd/mnist_cnn.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py index f78ccc8..b1d8dbe 100755 --- a/examples/autograd/mnist_cnn.py +++ b/examples/autograd/mnist_cnn.py @@ -106,15 +106,19 @@ if __name__ == '__main__': # operations initialization conv1 = autograd.Conv2D(1, 32, 3, padding=1, bias=False) + bn1 = autograd.BatchNorm(32) conv2 = autograd.Conv2D(32, 32, 3, padding=1) + bn2 = autograd.BatchNorm(32) linear = autograd.Linear(32 * 28 * 28, 10) def forward(x, t): y = conv1(x) y = autograd.relu(y) + y = bn1(y) y = autograd.max_pool_2d(y) y = conv2(y) + y = bn2(y) y = autograd.relu(y) y = autograd.max_pool_2d(y) y=autograd.flatten(y) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 3a2eddd..d272dcd 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -771,7 +771,7 @@ class Conv2D(Layer): return y -class BatchNorm2d(Layer): +class BatchNorm(Layer): def __init__(self, num_features, momentum=0.9): self.channels = num_features @@ -810,12 +810,12 @@ class BatchNorm2d(Layer): self.momentum, x.data) self.handle.device_id = x.device.id() - y = batchnorm2d(x, self.scale, self.bias, + y = batchnorm(x, self.scale, self.bias, self.running_mean, self.running_var, self.handle) return y -class _BatchNorm2d(Operation): +class _BatchNorm(Operation): def __init__(self, running_mean, running_var, handle): self.running_mean = running_mean.data @@ -855,5 +855,5 @@ class _BatchNorm2d(Operation): return dx, ds, db -def batchnorm2d(x, scale, bias, running_mean, running_var, handle): - return _BatchNorm2d(running_mean, running_var, handle)(x, scale, bias)[0] +def batchnorm(x, scale, bias, running_mean, running_var, handle): + return _BatchNorm(running_mean, running_var, handle)(x, scale, bias)[0] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_layer.i ---------------------------------------------------------------------- diff --git a/src/api/model_layer.i b/src/api/model_layer.i index d449f24..dc04be0 100644 --- a/src/api/model_layer.i +++ b/src/api/model_layer.i @@ -29,21 +29,21 @@ %{ -// To make the code compatible between py2 and py3, the follow -// macro is required, which forces the -// interface (function) to accept byte string (from python) and -// return byte string (in python) in py3. Otherwise the strings +// To make the code compatible between py2 and py3, the follow +// macro is required, which forces the +// interface (function) to accept byte string (from python) and +// return byte string (in python) in py3. Otherwise the strings // should be unicode strings in py3. // Note that by default the strings in python3 are of type unicode. -// You have to encode it with the correct encoding (default is utf-8) +// You have to encode it with the correct encoding (default is utf-8) // to convert it into bytes. Sometimes, the string is already byte string // e.g. from protobuf SerializeToString, then there is no need to do // conversion. The output byte strings should be decoded into unicode. -// For python2, the default type of string is byte string. +// For python2, the default type of string is byte string. // -// Because protobuf::SerializeToString cannot be decoded into unicode -// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the -// interface (function) to accept unicode strings as input args +// Because protobuf::SerializeToString cannot be decoded into unicode +// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the +// interface (function) to accept unicode strings as input args // and return unicode strings. // // TODO(wangwei) make strings compatible between py2 and py3. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_operation.i ---------------------------------------------------------------------- diff --git a/src/api/model_operation.i b/src/api/model_operation.i index 6f2d1fa..eb41fd0 100755 --- a/src/api/model_operation.i +++ b/src/api/model_operation.i @@ -7,6 +7,7 @@ #include "../src/model/operation/convolution.h" #include "../src/model/operation/batchnorm.h" %} + namespace singa { class ConvHandle { @@ -68,15 +69,15 @@ class CudnnBatchNormHandle: public BatchNormHandle{ size_t batchsize; }; -const vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, +const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, const Tensor& x, const Tensor& bnScale, const Tensor& bnBias, Tensor& running_mean, Tensor& running_var); -Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x, +Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x, const Tensor& bnScale, const Tensor& bnBias, const Tensor& running_mean, const Tensor& running_var); -const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, +const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean, const Tensor& var); - + #endif // USE_CUDNN } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/model/operation/batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/operation/batchnorm.cc b/src/model/operation/batchnorm.cc index 7040895..29eaba9 100755 --- a/src/model/operation/batchnorm.cc +++ b/src/model/operation/batchnorm.cc @@ -19,7 +19,7 @@ BatchNormHandle::BatchNormHandle(const float momentum, const Tensor& input) { } }; -#if USE_CUDNN +#ifdef USE_CUDNN CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum, const Tensor& input): BatchNormHandle(momentum, input) { if (is_2d) @@ -38,14 +38,14 @@ CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum, 1, 1)); }; -Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, +const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, const Tensor& x, const Tensor& bnScale, const Tensor& bnBias, Tensor& running_mean, Tensor& running_var) { CHECK_EQ(x.device()->lang(), kCuda); CHECK_EQ(bnScale.device()->lang(), kCuda); CHECK_EQ(bnBias.device()->lang(), kCuda); - CHECK_EQ(runningMean.device()->lang(), kCuda); - CHECK_EQ(runningVariance.device()->lang(), kCuda); + CHECK_EQ(running_mean.device()->lang(), kCuda); + CHECK_EQ(running_var.device()->lang(), kCuda); Tensor mean, var; mean.ResetLike(running_mean); @@ -78,7 +78,7 @@ Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, }); if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)}); return {output, mean, var}; -}; +} Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x, const Tensor& bnScale, @@ -86,8 +86,8 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, CHECK_EQ(x.device()->lang(), kCuda); CHECK_EQ(bnScale.device()->lang(), kCuda); CHECK_EQ(bnBias.device()->lang(), kCuda); - CHECK_EQ(cbnh.running_mean.device()->lang(), kCuda); - CHECK_EQ(cbnh.running_variance.device()->lang(), kCuda); + CHECK_EQ(running_mean.device()->lang(), kCuda); + CHECK_EQ(running_var.device()->lang(), kCuda); Shape shape = x.shape(); @@ -106,17 +106,13 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, input.block()->data(), cbnh.shape_desc, output.block()->mutable_data(), cbnh.param_desc, bnScale.block()->data(), bnBias.block()->data(), running_mean.block()->data(), running_var.block()->data(), epsilon)); - }, { - input.block(), bnScale.block(), bnBias.block(), running_mean.block(), - running_variance.block() - }, + }, { input.block(), bnScale.block(), bnBias.block(), running_mean.block(), running_var.block() }, {output.block()}); - if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)}); return output; -}; +} -std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, +const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean, const Tensor& var) { CHECK_EQ(dy.device()->lang(), kCuda); @@ -137,7 +133,7 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, dx.device()->Exec( [&](Context * ctx) { - + const float alpha = 1.0f, beta = .0f; double epsilon = CUDNN_BN_MIN_EPSILON; CUDNN_CHECK(cudnnBatchNormalizationBackward( @@ -151,8 +147,9 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, {dx.block(), dbnScale.block(), dbnBias.block()}); if (cbnh.is_2d) dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)}); - + return {dx, dbnScale, dbnBias}; -}; +} +#endif //USE_CUDNN }
