[2/5] incubator-singa git commit: SINGA-379 Implement batchnorm operation and its related functions for autograd

zhaojing Thu, 12 Jul 2018 01:40:40 -0700

SINGA-379 Implement batchnorm operation and its related functions for autograd


- implement batchnorm2d related functions(GPU part)

- add interface files for developed functions

- create corresponding operation and NewLayer for batchnorm2d


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a105b240
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a105b240
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a105b240

Branch: refs/heads/master
Commit: a105b2404e36c81f30b873e0c74ccdb9a7e36bfd
Parents: b30d7ea
Author: xuewanqi <[email protected]>
Authored: Sun Jul 8 15:08:43 2018 +0000
Committer: Wang Wei <[email protected]>
Committed: Wed Jul 11 21:57:47 2018 +0800

----------------------------------------------------------------------
 python/singa/autograd.py           |  80 +++++++++++++++-
 src/api/model_operation.i          |  31 ++++++
 src/model/layer/cudnn_batchnorm.cc |   2 +-
 src/model/operation/batchnorm.cc   | 164 ++++++++++++++++++++++++++++++++
 src/model/operation/batchnorm.h    |  62 ++++++++++++
 test/python/test_operation.py      |  17 ++++
 6 files changed, 354 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index aa6b37a..97a75b4 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -27,7 +27,7 @@ from .tensor import Tensor
 from . import layer
 from singa.proto import model_pb2
 from . import singa_wrap as singa
-
+#from .tensor import einsum
 
 CTensor = singa.Tensor
 training = False
@@ -415,6 +415,14 @@ class SoftMax(Operation):
         out = out_1 - out_2
         dx = CTensor(out_1.shape)
         dx.CopyFloatDataFromHostPtr(out.flatten())
+        '''grad = Tensor(data=dy)
+        output = Tensor(data=self.output)
+        out_1 = einsum('ki,ki->ki', grad, output)
+        medium_out = einsum('ki,kj->kij', output, output)
+        out_2 = einsum('kij,kj->ki', medium_out, grad)
+        out = out_1 - out_2
+        dx = CTensor(out_1.data.shape)
+        dx.CopyFloatDataFromHostPtr(out.data.flatten())'''
         if self.axis == 0:
             return dx
         elif self.axis == 1:
@@ -761,3 +769,73 @@ class Conv2D(Layer):
 
         y = conv2d(x, self.W, self.b, self.handle)
         return y
+
+class BatchNorm2d(NewLayer):
+    def __init__(self, num_features, momentum = 0.9):
+        self.channels = num_features
+        self.momentum = momentum
+
+        param_shape = (self.channels,)
+
+        self.scale = Tensor(shape=param_shape, requires_grad=True, 
stores_grad=True)
+        self.scale.set_value(1.0)
+
+        self.bias =  Tensor(shape=param_shape, requires_grad=True, 
stores_grad=True)
+        self.bias.set_value(0.0)
+
+        self.runningmean = Tensor(shape=param_shape, requires_grad=False, 
stores_grad=False)
+        self.runningvariance = Tensor(shape=param_shape, requires_grad=False, 
stores_grad=False)
+
+    def __call__(self, x):
+        assert x.shape[1] == self.channels, 'number of channels dismatched.'
+
+        self.device_check(x, self.scale, self.bias, 
self.runningmean,self.runningvariance)
+
+        if x.device.id() == -1:
+            raise NotImplementedError
+
+        else:
+            if not hasattr(self, 'handle'):
+                self.handle = singa.CudnnBatchNormHandle(self.momentum, 
x.data, self.runningmean.data, self.runningvariance.data)
+            elif x.shape[0] != self.handle.batchsize:
+                self.handle = singa.CudnnBatchNormHandle(self.momentum, 
x.data, self.runningmean.data, self.runningvariance.data)
+        self.handle.device_id = x.device.id()
+
+        y = batchnorm2d(x, self.scale, self.bias, self.handle)
+        return y
+
+
+class _BatchNorm2d(Operation):
+    def __init(self, handle):
+        self.handle = handle
+
+    def forward(self, x, scale, bias):
+        if training:
+            self.cache=(x,)
+            if self.handle.device_id == -1:
+                raise NotImplementedError
+            else:
+                return singa.GpuBatchNormForwardTraining(x, scale, bias, 
self.cache, self.handle)
+
+        else:
+            if self.handle.device_id == -1:
+                raise NotImplementedError
+            else:
+                return singa.GpuBatchNormForwardInference(x, scale, bias 
,self.handle)
+
+    def backward(self, dy):
+        assert training is True and hasattr(
+            self, 'cahce'), 'Please set training as True before do BP. '
+
+        if dy.device().id() != self.handle.device_id:
+            dy.ToDevice(self.cache[0].device())
+
+        if self.handle.device_id == -1:
+            raise NotImplementedError
+        else:
+            dx, ds, db = singa.GpuBatchNormBackward(dy, self.cache, 
self.handle)
+            return dx, ds, db
+
+
+def batchnorm2d(x, scale, bias, handle):
+    return _BatchNorm2d(handle)(x, scale, bias)[0]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 3858a2b..783a1f8 100755
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -5,6 +5,7 @@
 %include "std_string.i"
 %{
 #include "../src/model/operation/convolution.h"
+#include "../src/model/operation/batchnorm.h"
 %}
 namespace singa {
 
@@ -48,4 +49,34 @@ Tensor GpuConvBackwardb(const Tensor &dy, const Tensor &b, 
const CudnnConvHandle
 
 #endif  // USE_CUDNN
 
+class BatchNormHandle{
+  public:
+    BatchNormHandle(const float momentum, const Tensor& input, const Tensor& 
RunningMean_, const Tensor& RunningVariance_);
+
+    size_t batchsize;
+    Tensor runningMean_;
+    Tensor runningVariance_;
+
+};
+
+
+class CudnnBatchNormHandle: public BatchNormHandle{
+    public:
+      CudnnBatchNormHandle(const float momentum, const Tensor& input, const 
Tensor& RunningMean_, const Tensor& RunningVariance_);
+
+    size_t batchsize;
+    Tensor runningMean_;
+    Tensor runningVariance_;
+};
+
+Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, 
+   std::vector<Tensor>& cache, CudnnBatchNormHandle &cbnh);
+
+Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, const CudnnBatchNormHandle &cbnh);
+
+std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy,
+  const std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh);
+     
+
 }
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc 
b/src/model/layer/cudnn_batchnorm.cc
old mode 100644
new mode 100755
index 389b41b..4816817
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -167,7 +167,7 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnBatchNorm::Backward(
               saveVarBlock->data()));
 
         },
-        {dx.block(), grad.block(), bnScale_.block(), resultSaveMean_.block(),
+        {x.block(), grad.block(), bnScale_.block(), resultSaveMean_.block(),
          resultSaveVariance_.block()},
         {dx.block(), dbnScale_.block(), dbnBias_.block()});
   } else {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/operation/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/operation/batchnorm.cc b/src/model/operation/batchnorm.cc
new file mode 100644
index 0000000..9b6f9cd
--- /dev/null
+++ b/src/model/operation/batchnorm.cc
@@ -0,0 +1,164 @@
+#include "./batchnorm.h"
+
+namespace singa{
+
+BatchNormHandle::BatchNormHandle(const float momentum, const Tensor& input, 
const Tensor& RunningMean_, 
+  const Tensor& RunningVariance_){
+  factor_ = momentum;
+  batchsize = input.shape()[0];
+  channels_= input.shape()[2];
+  if (input.nDim()== 4u){
+    height_= input.shape()[3];
+    width_=input.shape()[4];
+    is_2d_= false;
+  }else{
+    size_t height_ = 1;
+    size_t width_ = 1;
+    bool is_2d_ = true;
+  }
+  runningMean_= RunningMean_;
+  runningVariance_= RunningVariance_;
+};
+
+CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum, const Tensor& 
input, const Tensor& RunningMean_, 
+  const Tensor& RunningVariance_):BatchNormHandle(momentum, input, 
RunningMean_, RunningVariance_){
+  if (is_2d_)
+      mode_ = CUDNN_BATCHNORM_PER_ACTIVATION;
+  else
+      mode_ = CUDNN_BATCHNORM_SPATIAL;
+  auto dtype = input.data_type();
+  CUDNN_CHECK(cudnnCreateTensorDescriptor(&shape_desc_));
+  CUDNN_CHECK(cudnnCreateTensorDescriptor(&param_desc_));
+  CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_, CUDNN_TENSOR_NCHW,
+                                         GetCudnnDataType(dtype), batchsize,
+                                         channels_, height_, width_));
+  CUDNN_CHECK(cudnnSetTensor4dDescriptor(param_desc_, CUDNN_TENSOR_NCHW,
+                                         GetCudnnDataType(dtype), 1, channels_,
+                                         1, 1));
+  };
+
+Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, 
+  std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh) {
+  
+  auto shape = x.shape();
+  Tensor output;
+  Tensor input;  //for unification of 2d and 4d cases.
+  if (cbnh.is_2d_)
+    input = Reshape(x, Shape{shape.at(0), shape.at(1), 1, 1});
+  else
+    input = x;
+  output.ResetLike(x);
+
+  Tensor resultSaveMean_;
+  Tensor resultSaveVariance_;
+
+  resultSaveMean_.Reshape(Shape{cbnh.channels_});
+  resultSaveVariance_.Reshape(Shape{cbnh.channels_});
+
+  cache.push_back(resultSaveMean_);
+  cache.push_back(resultSaveVariance_);
+  cache.push_back(bnScale_);
+  //cache={x, mean, var, scale}
+
+    output.device()->Exec(
+        [&output, &input, &bnScale_, &bnBias_, &cache, &cbnh](Context* ctx) {
+          Block* inBlock = input.block(), * outBlock = output.block(),
+                 * saveMeanBlock = cache[1].block(),
+                 * saveVarBlock = cache[2].block(),
+                 * runningMeanBlock = cbnh.runningMean_.block(),
+                 * runningVarBlock = cbnh.runningVariance_.block(),
+                 * bnScaleBlock = bnScale_.block(),
+                 * bnBiasBlock = bnBias_.block();
+          const float alpha = 1.0f, beta = 0.0f;
+          double epsilon = CUDNN_BN_MIN_EPSILON;
+          CUDNN_CHECK(cudnnBatchNormalizationForwardTraining(
+              ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, cbnh.shape_desc_,
+              inBlock->data(), cbnh.shape_desc_, outBlock->mutable_data(),
+              cbnh.param_desc_, bnScaleBlock->data(), bnBiasBlock->data(), 
cbnh.factor_,
+              runningMeanBlock->mutable_data(), 
runningVarBlock->mutable_data(),
+              epsilon, saveMeanBlock->mutable_data(),
+              saveVarBlock->mutable_data()));
+        },
+        {input.block(), bnScale_.block(), bnBias_.block()},
+        {output.block(), cbnh.runningMean_.block(), 
cbnh.runningVariance_.block(),
+         cache[1].block(), cache[2].block()}); 
+  if (cbnh.is_2d_) output.Reshape(Shape{shape.at(0), shape.at(1)});
+  return output;
+};
+
+Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, 
+   const CudnnBatchNormHandle &cbnh) {
+  auto shape = x.shape();
+  Tensor output;
+  Tensor input;  //for unification of 2d and 4d cases.
+  if (cbnh.is_2d_)
+    input = Reshape(x, Shape{shape.at(0), shape.at(1), 1, 1});
+  else
+    input = x;
+  output.ResetLike(x);
+    output.device()->Exec(
+        [&output, &input, &bnScale_, &bnBias_, &cbnh](Context* ctx) {
+          Block* inBlock = input.block(), * outBlock = output.block(),
+                 * runningMeanBlock = cbnh.runningMean_.block(),
+                 * runningVarBlock = cbnh.runningVariance_.block(),
+                 * bnScaleBlock = bnScale_.block(),
+                 * bnBiasBlock = bnBias_.block();
+          const float alpha = 1.0f, beta = 0.0f;
+          double epsilon = CUDNN_BN_MIN_EPSILON;
+          CUDNN_CHECK(cudnnBatchNormalizationForwardInference(
+              ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, cbnh.shape_desc_,
+              inBlock->data(), cbnh.shape_desc_, outBlock->mutable_data(),
+              cbnh.param_desc_, bnScaleBlock->data(), bnBiasBlock->data(),
+              runningMeanBlock->data(), runningVarBlock->data(), epsilon));
+        },
+        {input.block(), bnScale_.block(), bnBias_.block(), 
cbnh.runningMean_.block(),
+         cbnh.runningVariance_.block()},
+        {output.block()});
+  if (cbnh.is_2d_) output.Reshape(Shape{shape.at(0), shape.at(1)});
+  return output;
+};
+
+
+std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy, const 
std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh){
+
+  vector<Tensor> out_grads;
+  Tensor dx;
+  dx.ResetLike(dy);
+
+  Tensor dbnScale_;
+  dbnScale_.ResetLike(cache[3]);
+
+  Tensor dbnBias_;
+  dbnBias_.ResetLike(cache[3]);
+  //dbnBias_.ResetLike(bnBias_);
+
+  dx.device()->Exec(
+      [&dx, &dbnScale_, &dbnBias_, &dy, &cache, &cbnh](Context* ctx) {
+        Block* dyblock = dy.block(), * dxblock = dx.block(),
+               * xblock = cache[0].block(), * bnScaleBlock = cache[3].block(),
+               * dbnScaleBlock = dbnScale_.block(),
+               * dbnBiasBlock = dbnBias_.block(),
+               * saveMeanBlock = cache[1].block(),
+               * saveVarBlock = cache[2].block();
+        const float alpha = 1.0f, beta = .0f;
+        double epsilon = CUDNN_BN_MIN_EPSILON;
+        CUDNN_CHECK(cudnnBatchNormalizationBackward(
+            ctx->cudnn_handle, cbnh.mode_, &alpha, &beta, &alpha, &beta,
+            cbnh.shape_desc_, xblock->data(), cbnh.shape_desc_, 
dyblock->data(),
+            cbnh.shape_desc_, dxblock->mutable_data(), cbnh.param_desc_,
+            bnScaleBlock->data(), dbnScaleBlock->mutable_data(),
+            dbnBiasBlock->mutable_data(), epsilon, saveMeanBlock->data(),
+            saveVarBlock->data()));
+      },
+      {cache[0].block(), dy.block(), cache[3].block(), cache[1].block(),
+       cache[2].block()},
+      {dx.block(), dbnScale_.block(), dbnBias_.block()});
+  
+  if (cbnh.is_2d_) dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)});
+  out_grads.push_back(dx);
+  out_grads.push_back(dbnScale_);
+  out_grads.push_back(dbnBias_);
+return out_grads;
+};
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/src/model/operation/batchnorm.h
----------------------------------------------------------------------
diff --git a/src/model/operation/batchnorm.h b/src/model/operation/batchnorm.h
new file mode 100644
index 0000000..f2da4cd
--- /dev/null
+++ b/src/model/operation/batchnorm.h
@@ -0,0 +1,62 @@
+//#ifndef SINGA_MODEL_OPERATION_BATCHNORM_H_
+//#define SINGA_MODEL_OPERATION_BATCHNORM_H_
+
+#include <vector>
+#include "singa/core/tensor.h"
+
+#ifdef USE_CUDNN
+#include <cudnn.h>
+#include "../layer/cudnn_utils.h" // check_cudnn
+#endif // USE_CUDNN 
+
+namespace singa{
+
+class BatchNormHandle{
+  public:
+       BatchNormHandle(const float momentum, const Tensor& input, const 
Tensor& RunningMean_, const Tensor& RunningVariance_);
+
+       float factor_;
+       size_t channels_;
+       size_t batchsize;
+
+       Tensor runningMean_;
+       Tensor runningVariance_;
+
+       bool is_2d_ ;
+       //bool train = true;
+
+       size_t height_;
+       size_t width_;
+};
+
+//Tensor CpuBatchNormForwardTraining();
+
+//Tensor CpuBatchNormForwardInference();
+
+//Tensor CpuBatchNormBackwardx();
+
+
+#ifdef USE_CUDNN
+
+class CudnnBatchNormHandle: public BatchNormHandle{
+    public:
+      CudnnBatchNormHandle(const float momentum, const Tensor& input, const 
Tensor& RunningMean_, const Tensor& RunningVariance_);
+
+      //~CudnnBatchNormHandle();
+
+      cudnnBatchNormMode_t mode_;
+      cudnnTensorDescriptor_t shape_desc_ = nullptr;
+      cudnnTensorDescriptor_t param_desc_ = nullptr;
+};
+
+Tensor GpuBatchNormForwardTraining(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, 
+  std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh);
+
+Tensor GpuBatchNormForwardInference(const Tensor& x, const Tensor& bnScale_, 
const Tensor& bnBias_, 
+       const CudnnBatchNormHandle &cbnh);
+
+std::vector<Tensor> GpuBatchNormBackward(const Tensor& dy, const 
std::vector<Tensor>& cache, const CudnnBatchNormHandle &cbnh);
+
+#endif  // USE_CUDNN
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a105b240/test/python/test_operation.py
----------------------------------------------------------------------
diff --git a/test/python/test_operation.py b/test/python/test_operation.py
index 315a992..0e851d7 100755
--- a/test/python/test_operation.py
+++ b/test/python/test_operation.py
@@ -70,5 +70,22 @@ class TestPythonOperation(unittest.TestCase):
         y_without_bias = conv_without_bias_1(cpu_input_tensor)
         self.check_shape(y_without_bias.shape, (2, 1, 2, 2))
 
+    def test_batchnorm2d_gpu(self):
+        batchnorm_0 = autograd.BatchNorm2d(3)
+
+        gpu_input_tensor = tensor.Tensor(shape=(2, 3, 3, 3), device=gpu_dev)
+        gpu_input_tensor.gaussian(0.0, 1.0)
+
+        dy = CTensor([2, 3, 3, 3])
+        singa.Gaussian(0.0, 1.0, dy)
+
+        y=batchnorm_0(gpu_input_tensor)
+        dx, ds, db = y.creator.backward(dy)
+
+        self.check_shape(y.shape, (2, 3, 3, 3))
+        self.check_shape(dx.shape(), (2, 3, 3, 3))
+        self.check_shape(dx.shape(), (3,))
+        self.check_shape(db.shape(), (3,))
+
 if __name__ == '__main__':
     unittest.main()

[2/5] incubator-singa git commit: SINGA-379 Implement batchnorm operation and its related functions for autograd

Reply via email to