SINGA-379 Implement batchnorm operation and its related functions for autograd
- implement batchnorm2d related functions(GPU part) - add interface files for developed functions - create corresponding operation and NewLayer for batchnorm2d Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a105b240 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a105b240 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a105b240 Branch: refs/heads/master Commit: a105b2404e36c81f30b873e0c74ccdb9a7e36bfd Parents: b30d7ea Author: xuewanqi <[email protected]> Authored: Sun Jul 8 15:08:43 2018 +0000 Committer: Wang Wei <[email protected]> Committed: Wed Jul 11 21:57:47 2018 +0800 ---------------------------------------------------------------------- python/singa/autograd.py | 80 +++++++++++++++- src/api/model_operation.i | 31 ++++++ src/model/layer/cudnn_batchnorm.cc | 2 +- src/model/operation/batchnorm.cc | 164 ++++++++++++++++++++++++++++++++ src/model/operation/batchnorm.h | 62 ++++++++++++ test/python/test_operation.py | 17 ++++ 6 files changed, 354 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index aa6b37a..97a75b4 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -27,7 +27,7 @@ from .tensor import Tensor from . import layer from singa.proto import model_pb2 from . import singa_wrap as singa - +#from .tensor import einsum CTensor = singa.Tensor training = False @@ -415,6 +415,14 @@ class SoftMax(Operation): out = out_1 - out_2 dx = CTensor(out_1.shape) dx.CopyFloatDataFromHostPtr(out.flatten()) + '''grad = Tensor(data=dy) + output = Tensor(data=self.output) + out_1 = einsum('ki,ki->ki', grad, output) + medium_out = einsum('ki,kj->kij', output, output) + out_2 = einsum('kij,kj->ki', medium_out, grad) + out = out_1 - out_2 + dx = CTensor(out_1.data.shape) + dx.CopyFloatDataFromHostPtr(out.data.flatten())''' if self.axis == 0: return dx elif self.axis == 1: @@ -761,3 +769,73 @@ class Conv2D(Layer): y = conv2d(x, self.W, self.b, self.handle) return y + +class BatchNorm2d(NewLayer): + def __init__(self, num_features, momentum = 0.9): + self.channels = num_features + self.momentum = momentum + + param_shape = (self.channels,) + + self.scale = Tensor(shape=param_shape, requires_grad=True, stores_grad=True) + self.scale.set_value(1.0) + + self.bias = Tensor(shape=param_shape, requires_grad=True, stores_grad=True) + self.bias.set_value(0.0) + + self.runningmean = Tensor(shape=param_shape, requires_grad=False, stores_grad=False) + self.runningvariance = Tensor(shape=param_shape, requires_grad=False, stores_grad=False) + + def __call__(self, x): + assert x.shape[1] == self.channels, 'number of channels dismatched.' + + self.device_check(x, self.scale, self.bias, self.runningmean,self.runningvariance) + + if x.device.id() == -1: + raise NotImplementedError + + else: + if not hasattr(self, 'handle'): + self.handle = singa.CudnnBatchNormHandle(self.momentum, x.data, self.runningmean.data, self.runningvariance.data) + elif x.shape[0] != self.handle.batchsize: + self.handle = singa.CudnnBatchNormHandle(self.momentum, x.data, self.runningmean.data, self.runningvariance.data) + self.handle.device_id = x.device.id() + + y = batchnorm2d(x, self.scale, self.bias, self.handle) + return y + + +class _BatchNorm2d(Operation): + def __init(self, handle): + self.handle = handle + + def forward(self, x, scale, bias): + if training: + self.cache=(x,) + if self.handle.device_id == -1: + raise NotImplementedError + else: + return singa.GpuBatchNormForwardTraining(x, scale, bias, self.cache, self.handle) + + else: + if self.handle.device_id == -1: + raise NotImplementedError + else: + return singa.GpuBatchNormForwardInference(x, scale, bias ,self.handle) + + def backward(self, dy): + assert training is True and hasattr( + self, 'cahce'), 'Please set training as True before do BP. ' + + if dy.device().id() != self.handle.device_id: + dy.ToDevice(self.cache[0].device()) + + if self.handle.device_id == -1: + raise NotImplementedError + else: + dx, ds, db = singa.GpuBatchNormBackward(dy, self.cache, self.handle) + return dx, ds, db + + +def batchnorm2d(x, scale, bias, handle): + return _BatchNorm2d(handle)(x, scale, bias)[0] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/api/model_operation.i ---------------------------------------------------------------------- diff --git a/src/api/model_operation.i b/src/api/model_operation.i index 3858a2b..783a1f8 100755 --- a/src/api/model_operation.i +++ b/src/api/model_operation.i @@ -5,6 +5,7 @@ %include "std_string.i" %{ #include "../src/model/operation/convolution.h" +#include "../src/model/operation/batchnorm.h" %} namespace singa { @@ -48,4 +49,34 @@ Tensor GpuConvBackwardb(const Tensor &dy, const Tensor &b, const CudnnConvHandle #endif // USE_CUDNN +class BatchNormHandle{ + public: + BatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, const Tensor& RunningVariance_); + + size_t batchsize; + Tensor runningMean_; + Tensor runningVariance_; + +}; + + +class CudnnBatchNormHandle: public BatchNormHandle{ + public: + CudnnBatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, const Tensor& RunningVariance_); + + size_t batchsize; + Tensor runningMean_; + Tensor runningVariance_; +}; + +Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, + std::vector<Tensor>& cache, CudnnBatchNormHandle &cbnh); + +Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, const CudnnBatchNormHandle &cbnh); + +std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy, + const std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh); + + } + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/layer/cudnn_batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc old mode 100644 new mode 100755 index 389b41b..4816817 --- a/src/model/layer/cudnn_batchnorm.cc +++ b/src/model/layer/cudnn_batchnorm.cc @@ -167,7 +167,7 @@ const std::pair<Tensor, vector<Tensor>> CudnnBatchNorm::Backward( saveVarBlock->data())); }, - {dx.block(), grad.block(), bnScale_.block(), resultSaveMean_.block(), + {x.block(), grad.block(), bnScale_.block(), resultSaveMean_.block(), resultSaveVariance_.block()}, {dx.block(), dbnScale_.block(), dbnBias_.block()}); } else { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/operation/batchnorm.cc ---------------------------------------------------------------------- diff --git a/src/model/operation/batchnorm.cc b/src/model/operation/batchnorm.cc new file mode 100644 index 0000000..9b6f9cd --- /dev/null +++ b/src/model/operation/batchnorm.cc @@ -0,0 +1,164 @@ +#include "./batchnorm.h" + +namespace singa{ + +BatchNormHandle::BatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, + const Tensor& RunningVariance_){ + factor_ = momentum; + batchsize = input.shape()[0]; + channels_= input.shape()[2]; + if (input.nDim()== 4u){ + height_= input.shape()[3]; + width_=input.shape()[4]; + is_2d_= false; + }else{ + size_t height_ = 1; + size_t width_ = 1; + bool is_2d_ = true; + } + runningMean_= RunningMean_; + runningVariance_= RunningVariance_; +}; + +CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, + const Tensor& RunningVariance_):BatchNormHandle(momentum, input, RunningMean_, RunningVariance_){ + if (is_2d_) + mode_ = CUDNN_BATCHNORM_PER_ACTIVATION; + else + mode_ = CUDNN_BATCHNORM_SPATIAL; + auto dtype = input.data_type(); + CUDNN_CHECK(cudnnCreateTensorDescriptor(&shape_desc_)); + CUDNN_CHECK(cudnnCreateTensorDescriptor(¶m_desc_)); + CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_, CUDNN_TENSOR_NCHW, + GetCudnnDataType(dtype), batchsize, + channels_, height_, width_)); + CUDNN_CHECK(cudnnSetTensor4dDescriptor(param_desc_, CUDNN_TENSOR_NCHW, + GetCudnnDataType(dtype), 1, channels_, + 1, 1)); + }; + +Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, + std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh) { + + auto shape = x.shape(); + Tensor output; + Tensor input; //for unification of 2d and 4d cases. + if (cbnh.is_2d_) + input = Reshape(x, Shape{shape.at(0), shape.at(1), 1, 1}); + else + input = x; + output.ResetLike(x); + + Tensor resultSaveMean_; + Tensor resultSaveVariance_; + + resultSaveMean_.Reshape(Shape{cbnh.channels_}); + resultSaveVariance_.Reshape(Shape{cbnh.channels_}); + + cache.push_back(resultSaveMean_); + cache.push_back(resultSaveVariance_); + cache.push_back(bnScale_); + //cache={x, mean, var, scale} + + output.device()->Exec( + [&output, &input, &bnScale_, &bnBias_, &cache, &cbnh](Context* ctx) { + Block* inBlock = input.block(), * outBlock = output.block(), + * saveMeanBlock = cache[1].block(), + * saveVarBlock = cache[2].block(), + * runningMeanBlock = cbnh.runningMean_.block(), + * runningVarBlock = cbnh.runningVariance_.block(), + * bnScaleBlock = bnScale_.block(), + * bnBiasBlock = bnBias_.block(); + const float alpha = 1.0f, beta = 0.0f; + double epsilon = CUDNN_BN_MIN_EPSILON; + CUDNN_CHECK(cudnnBatchNormalizationForwardTraining( + ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, cbnh.shape_desc_, + inBlock->data(), cbnh.shape_desc_, outBlock->mutable_data(), + cbnh.param_desc_, bnScaleBlock->data(), bnBiasBlock->data(), cbnh.factor_, + runningMeanBlock->mutable_data(), runningVarBlock->mutable_data(), + epsilon, saveMeanBlock->mutable_data(), + saveVarBlock->mutable_data())); + }, + {input.block(), bnScale_.block(), bnBias_.block()}, + {output.block(), cbnh.runningMean_.block(), cbnh.runningVariance_.block(), + cache[1].block(), cache[2].block()}); + if (cbnh.is_2d_) output.Reshape(Shape{shape.at(0), shape.at(1)}); + return output; +}; + +Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, + const CudnnBatchNormHandle &cbnh) { + auto shape = x.shape(); + Tensor output; + Tensor input; //for unification of 2d and 4d cases. + if (cbnh.is_2d_) + input = Reshape(x, Shape{shape.at(0), shape.at(1), 1, 1}); + else + input = x; + output.ResetLike(x); + output.device()->Exec( + [&output, &input, &bnScale_, &bnBias_, &cbnh](Context* ctx) { + Block* inBlock = input.block(), * outBlock = output.block(), + * runningMeanBlock = cbnh.runningMean_.block(), + * runningVarBlock = cbnh.runningVariance_.block(), + * bnScaleBlock = bnScale_.block(), + * bnBiasBlock = bnBias_.block(); + const float alpha = 1.0f, beta = 0.0f; + double epsilon = CUDNN_BN_MIN_EPSILON; + CUDNN_CHECK(cudnnBatchNormalizationForwardInference( + ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, cbnh.shape_desc_, + inBlock->data(), cbnh.shape_desc_, outBlock->mutable_data(), + cbnh.param_desc_, bnScaleBlock->data(), bnBiasBlock->data(), + runningMeanBlock->data(), runningVarBlock->data(), epsilon)); + }, + {input.block(), bnScale_.block(), bnBias_.block(), cbnh.runningMean_.block(), + cbnh.runningVariance_.block()}, + {output.block()}); + if (cbnh.is_2d_) output.Reshape(Shape{shape.at(0), shape.at(1)}); + return output; +}; + + +std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy, const std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh){ + + vector<Tensor> out_grads; + Tensor dx; + dx.ResetLike(dy); + + Tensor dbnScale_; + dbnScale_.ResetLike(cache[3]); + + Tensor dbnBias_; + dbnBias_.ResetLike(cache[3]); + //dbnBias_.ResetLike(bnBias_); + + dx.device()->Exec( + [&dx, &dbnScale_, &dbnBias_, &dy, &cache, &cbnh](Context* ctx) { + Block* dyblock = dy.block(), * dxblock = dx.block(), + * xblock = cache[0].block(), * bnScaleBlock = cache[3].block(), + * dbnScaleBlock = dbnScale_.block(), + * dbnBiasBlock = dbnBias_.block(), + * saveMeanBlock = cache[1].block(), + * saveVarBlock = cache[2].block(); + const float alpha = 1.0f, beta = .0f; + double epsilon = CUDNN_BN_MIN_EPSILON; + CUDNN_CHECK(cudnnBatchNormalizationBackward( + ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, &alpha, &beta, + cbnh.shape_desc_, xblock->data(), cbnh.shape_desc_, dyblock->data(), + cbnh.shape_desc_, dxblock->mutable_data(), cbnh.param_desc_, + bnScaleBlock->data(), dbnScaleBlock->mutable_data(), + dbnBiasBlock->mutable_data(), epsilon, saveMeanBlock->data(), + saveVarBlock->data())); + }, + {cache[0].block(), dy.block(), cache[3].block(), cache[1].block(), + cache[2].block()}, + {dx.block(), dbnScale_.block(), dbnBias_.block()}); + + if (cbnh.is_2d_) dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)}); + out_grads.push_back(dx); + out_grads.push_back(dbnScale_); + out_grads.push_back(dbnBias_); +return out_grads; +}; + +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/operation/batchnorm.h ---------------------------------------------------------------------- diff --git a/src/model/operation/batchnorm.h b/src/model/operation/batchnorm.h new file mode 100644 index 0000000..f2da4cd --- /dev/null +++ b/src/model/operation/batchnorm.h @@ -0,0 +1,62 @@ +//#ifndef SINGA_MODEL_OPERATION_BATCHNORM_H_ +//#define SINGA_MODEL_OPERATION_BATCHNORM_H_ + +#include <vector> +#include "singa/core/tensor.h" + +#ifdef USE_CUDNN +#include <cudnn.h> +#include "../layer/cudnn_utils.h" // check_cudnn +#endif // USE_CUDNN + +namespace singa{ + +class BatchNormHandle{ + public: + BatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, const Tensor& RunningVariance_); + + float factor_; + size_t channels_; + size_t batchsize; + + Tensor runningMean_; + Tensor runningVariance_; + + bool is_2d_ ; + //bool train = true; + + size_t height_; + size_t width_; +}; + +//Tensor CpuBatchNormForwardTraining(); + +//Tensor CpuBatchNormForwardInference(); + +//Tensor CpuBatchNormBackwardx(); + + +#ifdef USE_CUDNN + +class CudnnBatchNormHandle: public BatchNormHandle{ + public: + CudnnBatchNormHandle(const float momentum, const Tensor& input, const Tensor& RunningMean_, const Tensor& RunningVariance_); + + //~CudnnBatchNormHandle(); + + cudnnBatchNormMode_t mode_; + cudnnTensorDescriptor_t shape_desc_ = nullptr; + cudnnTensorDescriptor_t param_desc_ = nullptr; +}; + +Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, + std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh); + +Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, const Tensor& bnBias_, + const CudnnBatchNormHandle &cbnh); + +std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy, const std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh); + +#endif // USE_CUDNN + +} // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/test/python/test_operation.py ---------------------------------------------------------------------- diff --git a/test/python/test_operation.py b/test/python/test_operation.py index 315a992..0e851d7 100755 --- a/test/python/test_operation.py +++ b/test/python/test_operation.py @@ -70,5 +70,22 @@ class TestPythonOperation(unittest.TestCase): y_without_bias = conv_without_bias_1(cpu_input_tensor) self.check_shape(y_without_bias.shape, (2, 1, 2, 2)) + def test_batchnorm2d_gpu(self): + batchnorm_0 = autograd.BatchNorm2d(3) + + gpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=gpu_dev) + gpu_input_tensor.gaussian(0.0, 1.0) + + dy = CTensor([2, 3, 3, 3]) + singa.Gaussian(0.0, 1.0, dy) + + y=batchnorm_0(gpu_input_tensor) + dx, ds, db = y.creator.backward(dy) + + self.check_shape(y.shape, (2, 3, 3, 3)) + self.check_shape(dx.shape(), (2, 3, 3, 3)) + self.check_shape(dx.shape(), (3,)) + self.check_shape(db.shape(), (3,)) + if __name__ == '__main__': unittest.main()
