[2/4] incubator-singa git commit: SINGA-136 Support cuDNN v4

wangwei Wed, 06 Apr 2016 09:01:32 -0700

SINGA-136 Support cuDNN v4

Add cuDNN BatchNorm Layer



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/4b4ad057
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/4b4ad057
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/4b4ad057

Branch: refs/heads/master
Commit: 4b4ad0573ac0f3bcc0403fe13d81925782d4352d
Parents: afc50a9
Author: seaok <[email protected]>
Authored: Wed Apr 6 14:58:26 2016 +0800
Committer: seaok <[email protected]>
Committed: Wed Apr 6 14:58:26 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/cudnn_bm.conf         | 334 ++++++++++++++++++++++++++++
 include/singa/neuralnet/neuron_layer.h |  30 +++
 src/driver.cc                          |   2 +
 src/neuralnet/neuron_layer/bm.cc       |  68 ++++++
 src/neuralnet/neuron_layer/cudnn_bm.cc | 150 +++++++++++++
 src/proto/job.proto                    |   7 +
 6 files changed, 591 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/examples/cifar10/cudnn_bm.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/cudnn_bm.conf b/examples/cifar10/cudnn_bm.conf
new file mode 100644
index 0000000..071ed8a
--- /dev/null
+++ b/examples/cifar10/cudnn_bm.conf
@@ -0,0 +1,334 @@
+name: "cifar10-convnet"
+train_steps: 70000
+test_steps: 100
+test_freq: 1000
+#validate_steps: 100
+#validate_freq: 300
+disp_freq: 200
+gpu: 0
+#checkpoint_path: "examples/cifar10/checkpoint/step1000-worker0"
+train_one_batch {
+  alg: kBP
+}
+updater{
+  type: kSGD
+  weight_decay:0.004
+  momentum:0.9
+  learning_rate {
+    type: kFixedStep
+    fixedstep_conf:{
+      step:0
+      step:60000
+      step:65000
+      step_lr:0.001
+      step_lr:0.0001
+      step_lr:0.00001
+    }
+  }
+}
+neuralnet {
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path: "examples/cifar10/train_data.bin"
+      mean_file: "examples/cifar10/image_mean.bin"
+      batchsize: 100
+      #random_skip: 5000
+      shape: 3
+      shape: 32
+      shape: 32
+    }
+    include: kTrain
+  }
+#  layer{
+#    name: "data"
+#    type: kRecordInput
+#    store_conf {
+#      backend: "kvfile"
+#      path: "examples/cifar10/val_data.bin"
+#      mean_file: "examples/cifar10/image_mean.bin"
+#      batchsize: 64
+#      random_skip: 5000
+#      shape: 3
+#      shape: 32
+#      shape: 32
+#    }
+#    include: kVal
+#  }
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path: "examples/cifar10/test_data.bin"
+      mean_file: "examples/cifar10/image_mean.bin"
+      batchsize: 100
+      shape: 3
+      shape: 32
+      shape: 32
+    }
+    include: kTest
+  }
+
+  layer {
+    name: "conv1"
+    type: kCudnnConv
+    srclayers: "data"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w1"
+      init {
+        type:kGaussian
+        std:0.0001
+      }
+    }
+    param {
+      name: "b1"
+      lr_scale:2.0
+      wd_scale: 0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+
+  layer {
+    name: "pool1"
+    type: kCudnnPool
+    srclayers: "conv1"
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "bm1"
+    type: kCudnnBM
+      param {
+        name: "s11"
+        init {
+                 type:kConstant
+                 value:1
+        }
+      }
+         param {
+               name: "s12"
+               init {
+                 type:kConstant
+                 value:0
+               }
+         }
+    srclayers:"pool1"
+  }
+  layer {
+    name: "relu1"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers:"bm1"
+  }
+  layer {
+    name: "conv2"
+    type: kCudnnConv
+    srclayers: "relu1"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w2"
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b2"
+      lr_scale:2.0
+      wd_scale: 0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+  layer {
+    name: "bm2"
+    type: kCudnnBM
+      param {
+        name: "s21"
+        init {
+                 type:kConstant
+                 value:1
+        }
+      }
+         param {
+               name: "s22"
+               init {
+                 type:kConstant
+                 value:0
+               }
+         }
+    srclayers:"conv2"
+  }
+  layer {
+    name: "relu2"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers:"bm2"
+  }
+  layer {
+    name: "pool2"
+    type: kCudnnPool
+    srclayers: "relu2"
+    pooling_conf {
+      pool: AVG
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "conv3"
+    type: kCudnnConv
+    srclayers: "relu2"
+    convolution_conf {
+      num_filters: 64
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w3"
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b3"
+      lr_scale: 2
+      wd_scale: 0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+  layer {
+    name: "bm3"
+    type: kCudnnBM
+      param {
+        name: "s31"
+        init {
+                 type:kConstant
+                 value:1
+        }
+      }
+         param {
+               name: "s32"
+               init {
+                 type:kConstant
+                 value:0
+               }
+         }
+    srclayers:"conv3"
+  }
+  layer {
+    name: "relu3"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    share_src_blobs: true
+    srclayers:"bm3"
+  }
+  layer {
+    name: "pool3"
+    type: kCudnnPool
+    srclayers: "relu3"
+    pooling_conf {
+      pool: AVG
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers:"pool3"
+    innerproduct_conf {
+      num_output: 10
+    }
+    param {
+      name: "w4"
+      wd_scale:250
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b4"
+      lr_scale:2.0
+      wd_scale:0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+  layer {
+   name : "softmax"
+   type: kCudnnSoftmax
+   srclayers: "ip1"
+   include: kTest
+  }
+
+  layer {
+   name : "accuracy"
+   type: kAccuracy
+   srclayers: "softmax"
+   srclayers: "data"
+   include: kTest
+  }
+  layer{
+    name: "loss"
+    type: kSoftmaxLoss
+    srclayers:"ip1"
+    srclayers: "data"
+    include : kTrain
+  }
+# uncomment "softmax", "argsort", "output" layer and comment "loss" layer
+# to extract features from argsort
+#  layer {
+#    name : "output"
+#    type: kCSVOutput
+#    srclayers: "argsort"
+#    store_conf {
+#      path: "examples/cifar10/out.csv"
+#    }
+#  }
+}
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+  nworkers_per_group: 1
+  nworkers_per_procs: 1
+  workspace: "examples/cifar10"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h 
b/include/singa/neuralnet/neuron_layer.h
index f03e91b..3cdc137 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -351,6 +351,17 @@ class STanhLayer : public NeuronLayer {
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
 };
 
+
+class BMLayer : public NeuronLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) 
override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+ protected:
+  Param *bnScale_, *bnBias_;
+  int batchsize_,  channels_, height_, width_;
+};
+
 /*************** Layers implemented using cudnn v3 ***************/
 #ifdef USE_CUDNN
 #define CHECK_CUDNN(x) CHECK_EQ(x, CUDNN_STATUS_SUCCESS)
@@ -447,6 +458,25 @@ class CudnnSoftmaxLayer : public SoftmaxLayer, public 
CudnnBase {
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
 };
+
+/**
+ * Cudnn Batch Normalization layer
+ */
+class CudnnBMLayer : public BMLayer, public CudnnBase {
+ public:
+  ~CudnnBMLayer();
+  void InitCudnn() override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+ protected:
+  cudnnBatchNormMode_t mode_;
+  cudnnTensorDescriptor_t bnScaleBiasMeanVar_desc_;
+  cudnnTensorDescriptor_t bnScaleBiasDiff_desc_;
+  Blob<float> resultSaveMean_;
+  Blob<float> resultSaveInvVariance_;
+  Blob<float> resultRunningMean_;
+  Blob<float> resultRunningInvVariance_;
+};
 #endif  // USE_CUDNN
 
 /******************** RBM layers *****************/

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 6163865..ce1c635 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -98,6 +98,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<CudnnLRNLayer, int>(kCudnnLRN);
   RegisterLayer<CudnnSoftmaxLayer, int>(kCudnnSoftmax);
   RegisterLayer<CudnnSoftmaxLossLayer, int>(kCudnnSoftmaxLoss);
+  RegisterLayer<CudnnBMLayer, int>(kCudnnBM);
 #endif
 
   RegisterLayer<DropoutLayer, int>(kDropout);
@@ -119,6 +120,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<STanhLayer, int>(kSTanh);
   RegisterLayer<SoftmaxLayer, int>(kSoftmax);
   RegisterLayer<GRULayer, int>(kGRU);
+  RegisterLayer<BMLayer, int>(kBM);
 
 #ifdef USE_LMDB
   RegisterLayer<LMDBDataLayer, int>(kLMDBData);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/src/neuralnet/neuron_layer/bm.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/bm.cc b/src/neuralnet/neuron_layer/bm.cc
new file mode 100644
index 0000000..5784595
--- /dev/null
+++ b/src/neuralnet/neuron_layer/bm.cc
@@ -0,0 +1,68 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include <glog/logging.h>
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/utils/singleton.h"
+
+namespace singa {
+
+using std::vector;
+
+void BMLayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  Layer::Setup(conf, srclayers);
+  data_.ReshapeLike(srclayers[0]->data(this));
+  grad_.ReshapeLike(srclayers[0]->grad(this));
+
+  const vector<int>& srcshape = srclayers[0]->data(this).shape();
+
+  batchsize_ = srcshape[0];
+  channels_ = srcshape[1];
+  height_ = srcshape[2];
+  width_ = srcshape[3];
+
+  bnScale_ = Param::Create(conf.param(0));
+  bnScale_->Setup(vector<int>{1, channels_, 1, 1});
+
+  bnBias_ = Param::Create(conf.param(1));
+  bnBias_->Setup(vector<int>{1, channels_, 1, 1});
+  bnScale_->InitValues(1);
+
+ /* float* p = bnScale_->data().mutable_cpu_data();
+
+  cout<<"inite param"<<endl;
+  for(int i=0;i<3;++i) {
+//     p[i]=1.0;
+       cout<<p[i]<<" ";
+  }
+  cout<<endl;*/
+}
+
+void BMLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  // Todo
+}
+
+void BMLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+  // Todo
+}
+
+}  //  namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/src/neuralnet/neuron_layer/cudnn_bm.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/cudnn_bm.cc 
b/src/neuralnet/neuron_layer/cudnn_bm.cc
new file mode 100644
index 0000000..fdc9ea9
--- /dev/null
+++ b/src/neuralnet/neuron_layer/cudnn_bm.cc
@@ -0,0 +1,150 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/neuron_layer.h"
+
+namespace singa {
+
+CudnnBMLayer::~CudnnBMLayer() {
+  if (has_init_cudnn_) {
+    CHECK_CUDNN(cudnnDestroyTensorDescriptor(bnScaleBiasMeanVar_desc_));
+    CHECK_CUDNN(cudnnDestroyTensorDescriptor(bnScaleBiasDiff_desc_));
+  }
+}
+
+void CudnnBMLayer::InitCudnn() {
+  CudnnBase::InitCudnn();
+
+  CHECK_CUDNN(cudnnCreateTensorDescriptor(&bnScaleBiasMeanVar_desc_));
+  CHECK_CUDNN(cudnnCreateTensorDescriptor(&bnScaleBiasDiff_desc_));
+
+  CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_,
+        CUDNN_TENSOR_NCHW,
+        CUDNN_DATA_FLOAT,
+        batchsize_,
+        channels_,
+        height_,
+        width_));
+  CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_,
+        CUDNN_TENSOR_NCHW,
+        CUDNN_DATA_FLOAT,
+        batchsize_,
+        channels_,
+        height_,
+        width_));
+  CHECK_CUDNN(cudnnSetTensor4dDescriptor(bnScaleBiasMeanVar_desc_,
+        CUDNN_TENSOR_NCHW,
+        CUDNN_DATA_FLOAT,
+        1,
+        channels_,
+        1,
+        1));
+  CHECK_CUDNN(cudnnSetTensor4dDescriptor(bnScaleBiasDiff_desc_,
+        CUDNN_TENSOR_NCHW,
+        CUDNN_DATA_FLOAT,
+        1,
+        channels_,
+        1,
+        1));
+
+  vector<int> shape{1, channels_, 1, 1};
+
+  resultSaveMean_.Reshape(shape);
+  resultSaveInvVariance_.Reshape(shape);
+  resultRunningMean_.Reshape(shape);
+  resultRunningInvVariance_.Reshape(shape);
+
+  mode_ = CUDNN_BATCHNORM_SPATIAL;
+}
+
+void CudnnBMLayer::ComputeFeature(int flag,
+    const vector<Layer*>& srclayers) {
+  if (!has_init_cudnn_)
+    InitCudnn();
+
+  const float alpha = 1.0f, beta = 0.0f;
+  double exponentialAverageFactor = 1.0;
+  double epsilon = CUDNN_BN_MIN_EPSILON;
+
+  // check training
+  if ((flag & kTrain) != kTrain) {
+    CHECK_CUDNN(cudnnBatchNormalizationForwardInference(handle_,
+          mode_,
+          &alpha,
+          &beta,
+          src_desc_,
+          srclayers.at(0)->data(this).gpu_data(),
+          my_desc_,
+          data_.mutable_gpu_data(),
+          bnScaleBiasMeanVar_desc_,
+          bnScale_->data().gpu_data(),
+          bnBias_->data().gpu_data(),
+          resultRunningMean_.gpu_data(),
+          resultRunningInvVariance_.gpu_data(),
+          epsilon));
+  } else {
+    CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(handle_,
+          mode_,
+          &alpha,
+          &beta,
+          src_desc_,
+          srclayers.at(0)->data(this).gpu_data(),
+          my_desc_,
+          data_.mutable_gpu_data(),
+          bnScaleBiasMeanVar_desc_,
+          bnScale_->data().gpu_data(),
+          bnBias_->data().gpu_data(),
+          exponentialAverageFactor,
+          resultRunningMean_.mutable_gpu_data(),
+          resultRunningInvVariance_.mutable_gpu_data(),
+          epsilon,
+          resultSaveMean_.mutable_gpu_data(),
+          resultSaveInvVariance_.mutable_gpu_data()));
+  }
+}
+
+void CudnnBMLayer::ComputeGradient(int flag,
+    const vector<Layer*>& srclayers) {
+
+  const float alpha = 1.0f, beta = 0.0f, alphaDiff = 1.0f, betaDiff = 0.0f;
+  double epsilon = CUDNN_BN_MIN_EPSILON;
+
+  CHECK_CUDNN(cudnnBatchNormalizationBackward(handle_,
+      mode_,
+      &alpha,
+      &beta,
+      &alphaDiff,
+      &betaDiff,
+      src_desc_,
+      srclayers.at(0)->data(this).gpu_data(),
+      my_desc_,
+      grad_.gpu_data(),
+      src_desc_,
+      srclayers.at(0)->mutable_grad(this)->mutable_gpu_data(),
+      bnScaleBiasDiff_desc_,
+      bnScale_->data().gpu_data(),
+      bnScale_->grad().mutable_gpu_data(),
+      bnBias_->grad().mutable_gpu_data(),
+      epsilon,
+      resultSaveMean_.gpu_data(),
+      resultSaveInvVariance_.gpu_data()));
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4b4ad057/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 7bc0ea3..622248c 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -249,6 +249,7 @@ message LayerProto {
   optional SoftmaxProto softmax_conf = 214;
   optional GRUProto gru_conf = 215;
   optional EmbeddingProto embedding_conf = 216;
+  optional BMProto bm_conf = 217;
 
   // configuration for loss layers, id range [300, 400)
   optional SoftmaxLossProto softmaxloss_conf = 301;
@@ -393,6 +394,10 @@ message EmbeddingProto {
   optional int32 feature_dim = 2 [default = 100];
 
 }
+
+message BMProto {
+}
+
 message SoftmaxLossProto {
   // computing accuracy against topk results
   optional int32 topk = 1 [default = 1];
@@ -676,6 +681,7 @@ enum LayerType {
   kSoftmax = 214;
   kGRU = 215;
   kEmbedding = 216;
+  kBM = 217;
 
   // cudnn v3
   kCudnnConv = 250;
@@ -683,6 +689,7 @@ enum LayerType {
   kCudnnLRN = 252;
   kCudnnSoftmax = 253;
   kCudnnActivation = 254;
+  kCudnnBM = 255;
 
   /*
    * Loss layers

[2/4] incubator-singa git commit: SINGA-136 Support cuDNN v4

Reply via email to