SINGA-113 Model/Hybrid Partition Support

Update memory copy in SliceLayer and ConcateLayer considering cpu (memcpy) and 
gpu (cudaMemcpy).
Test hybrid partition running on GPU (CUDNN) using cudnn_hybrid.conf


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bf4cd3bc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bf4cd3bc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bf4cd3bc

Branch: refs/heads/master
Commit: bf4cd3bc9473d802d97327226c636ddc442cda0f
Parents: 82563f6
Author: Wei Wang <[email protected]>
Authored: Fri Dec 25 17:47:13 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Fri Dec 25 21:27:42 2015 +0800

----------------------------------------------------------------------
 examples/cifar10/cudnn_hybrid.conf         | 306 ++++++++++++++++++++++++
 include/singa/neuralnet/connection_layer.h |  10 +-
 src/neuralnet/connection_layer/concate.cc  |  67 ++++--
 src/neuralnet/connection_layer/slice.cc    |  76 ++++--
 src/neuralnet/connection_layer/split.cc    |  16 +-
 src/utils/graph.cc                         |   5 +-
 6 files changed, 422 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/examples/cifar10/cudnn_hybrid.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/cudnn_hybrid.conf 
b/examples/cifar10/cudnn_hybrid.conf
new file mode 100644
index 0000000..a11145c
--- /dev/null
+++ b/examples/cifar10/cudnn_hybrid.conf
@@ -0,0 +1,306 @@
+name: "cifar10-convnet"
+train_steps: 10000
+test_steps: 0
+test_freq: 200
+#validate_steps: 100
+#validate_freq: 300
+disp_freq: 200
+gpu: 0
+gpu: 1
+#debug: true
+#checkpoint_path: "examples/cifar10/checkpoint/step1000-worker0"
+train_one_batch {
+  alg: kBP
+}
+updater{
+  type: kSGD
+  weight_decay:0.004
+  momentum:0.9
+  learning_rate {
+    type: kFixedStep
+    fixedstep_conf:{
+      step:0
+      step:60000
+      step:65000
+      step_lr:0.001
+      step_lr:0.0001
+      step_lr:0.00001
+    }
+  }
+}
+neuralnet {
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path: "examples/cifar10/train_data.bin"
+      mean_file: "examples/cifar10/image_mean.bin"
+      batchsize: 100
+      #random_skip: 5000
+      shape: 3
+      shape: 32
+      shape: 32
+    }
+    include: kTrain
+    partition_dim: 0
+  }
+#  layer{
+#    name: "data"
+#    type: kRecordInput
+#    store_conf {
+#      backend: "kvfile"
+#      path: "examples/cifar10/val_data.bin"
+#      mean_file: "examples/cifar10/image_mean.bin"
+#      batchsize: 64
+#      random_skip: 5000
+#      shape: 3
+#      shape: 32
+#      shape: 32
+#    }
+#    include: kVal
+#  }
+  layer{
+    name: "data"
+    type: kRecordInput
+    store_conf {
+      backend: "kvfile"
+      path: "examples/cifar10/test_data.bin"
+      mean_file: "examples/cifar10/image_mean.bin"
+      batchsize: 100
+      shape: 3
+      shape: 32
+      shape: 32
+    }
+    include: kTest
+    partition_dim: 0
+  }
+
+  layer {
+    partition_dim: 0
+    name: "conv1"
+    type: kCudnnConv
+    srclayers: "data"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w1"
+      init {
+        type:kGaussian
+        std:0.0001
+      }
+    }
+    param {
+      name: "b1"
+      lr_scale:2.0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+
+  layer {
+    partition_dim: 0
+    name: "pool1"
+    type: kCudnnPool
+    srclayers: "conv1"
+    pooling_conf {
+      pool: MAX
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    partition_dim: 0
+    name: "relu1"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    srclayers:"pool1"
+  }
+  layer {
+    partition_dim: 0
+    name: "norm1"
+    type: kCudnnLRN
+    lrn_conf {
+      local_size: 3
+      alpha: 5e-05
+      beta: 0.75
+    }
+    srclayers:"relu1"
+  }
+  layer {
+    partition_dim: 0
+    name: "conv2"
+    type: kCudnnConv
+    srclayers: "norm1"
+    convolution_conf {
+      num_filters: 32
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w2"
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b2"
+      lr_scale:2.0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+  layer {
+    partition_dim: 0
+    name: "relu2"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    srclayers:"conv2"
+  }
+  layer {
+    partition_dim: 0
+    name: "pool2"
+    type: kCudnnPool
+    srclayers: "relu2"
+    pooling_conf {
+      pool: AVG
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    partition_dim: 0
+    name: "norm2"
+    type: kCudnnLRN
+    lrn_conf {
+      local_size: 3
+      alpha: 5e-05
+      beta: 0.75
+    }
+    srclayers:"pool2"
+  }
+  layer {
+    partition_dim: 0
+    name: "conv3"
+    type: kCudnnConv
+    srclayers: "norm2"
+    convolution_conf {
+      num_filters: 64
+      kernel: 5
+      stride: 1
+      pad:2
+    }
+    param {
+      name: "w3"
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b3"
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+  layer {
+    partition_dim: 0
+    name: "relu3"
+    type: kCudnnActivation
+    activation_conf {
+      type: RELU
+    }
+    srclayers:"conv3"
+  }
+  layer {
+    partition_dim: 0
+    name: "pool3"
+    type: kCudnnPool
+    srclayers: "relu3"
+    pooling_conf {
+      pool: AVG
+      kernel: 3
+      stride: 2
+    }
+  }
+  layer {
+    partition_dim: 1
+    name: "ip1"
+    type: kInnerProduct
+    srclayers:"pool3"
+    innerproduct_conf {
+      num_output: 10
+    }
+    param {
+      name: "w4"
+      wd_scale:250
+      init {
+        type:kGaussian
+        std:0.01
+      }
+    }
+    param {
+      name: "b4"
+      lr_scale:2.0
+      wd_scale:0
+      init {
+        type: kConstant
+        value:0
+      }
+    }
+  }
+#  layer {
+#   name : "softmax"
+#   type: kSoftmax
+#   srclayers: "ip1"
+#  }
+#
+#  layer {
+#   name : "argsort"
+#   type: kArgSort
+#   srclayers: "softmax"
+#  }
+  layer{
+    partition_dim: 0
+    name: "loss"
+    type: kSoftmaxLoss
+    softmaxloss_conf{
+      topk:1
+    }
+    srclayers:"ip1"
+    srclayers: "data"
+  }
+# uncomment "softmax", "argsort", "output" layer and comment "loss" layer
+# to extract features from argsort
+#  layer {
+#    name : "output"
+#    type: kCSVOutput
+#    srclayers: "argsort"
+#    store_conf {
+#      path: "examples/cifar10/out.csv"
+#    }
+#  }
+}
+cluster {
+  nworker_groups: 1
+  nserver_groups: 1
+  nworkers_per_group: 2
+  nworkers_per_procs: 2
+  workspace: "examples/cifar10"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/include/singa/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer.h 
b/include/singa/neuralnet/connection_layer.h
index 14e5092..a18f458 100644
--- a/include/singa/neuralnet/connection_layer.h
+++ b/include/singa/neuralnet/connection_layer.h
@@ -103,8 +103,8 @@ class ConcateLayer : public ConnectionLayer {
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
 
  private:
-  int num_concates = 0;
-  int concate_dim = 0;
+  int num_concates_ = 0;
+  int concate_dim_ = 0;
 };
 
 /**
@@ -126,8 +126,8 @@ class SliceLayer : public ConnectionLayer {
   Blob<float>* mutable_grad(const Layer* from) override;
 
  private:
-  int num_slices = 0;
-  int slice_dim = 0;
+  int num_slices_ = 0;
+  int slice_dim_ = 0;
   Layer2Index layer_idx_;
 };
 
@@ -149,7 +149,7 @@ class SplitLayer : public ConnectionLayer {
   Blob<float>* mutable_grad(const Layer* from) override;
 
  private:
-  int num_splits = 0;
+  int num_splits_ = 0;
   Layer2Index layer_idx_;
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/src/neuralnet/connection_layer/concate.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/concate.cc 
b/src/neuralnet/connection_layer/concate.cc
index adf701e..0cdd812 100644
--- a/src/neuralnet/connection_layer/concate.cc
+++ b/src/neuralnet/connection_layer/concate.cc
@@ -20,6 +20,8 @@
 *************************************************************/
 
 #include "singa/neuralnet/connection_layer.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
 
 namespace singa {
 
@@ -28,15 +30,15 @@ void ConcateLayer::Setup(const LayerProto& conf,
   CHECK_GT(srclayers.size(), 1);
   Layer::Setup(conf, srclayers);
   vector<int> shape = srclayers[0]->data(this).shape();
-  concate_dim = conf.concate_conf().concate_dim();
-  num_concates = conf.concate_conf().num_concates();
-  CHECK_GE(concate_dim, 0);
-  CHECK_LT(concate_dim, shape.size());
-  CHECK_EQ(num_concates, srclayers.size());
+  concate_dim_ = conf.concate_conf().concate_dim();
+  num_concates_ = conf.concate_conf().num_concates();
+  CHECK_GE(concate_dim_, 0);
+  CHECK_LT(concate_dim_, shape.size());
+  CHECK_EQ(num_concates_, srclayers.size());
   for (size_t i = 1; i < srclayers.size(); i++) {
     const vector<int>& src_shape = srclayers[i]->data(this).shape();
     for (size_t j = 0; j < shape.size(); j++)
-      if (static_cast<int>(j) == concate_dim)
+      if (static_cast<int>(j) == concate_dim_)
         shape[j] += src_shape[j];
       else
         CHECK_EQ(shape[j], src_shape[j]);
@@ -47,18 +49,32 @@ void ConcateLayer::Setup(const LayerProto& conf,
 
 void ConcateLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
   CHECK_GT(srclayers.size(), 1);
-  CHECK_EQ(num_concates, srclayers.size());
+  CHECK_EQ(num_concates_, srclayers.size());
   // calculate step for each memcpy
-  int step = srclayers[0]->data(this).shape()[concate_dim];
-  for (unsigned i = concate_dim + 1; i < data_.shape().size(); ++i)
+  int step = srclayers[0]->data(this).shape()[concate_dim_];
+  for (unsigned i = concate_dim_ + 1; i < data_.shape().size(); ++i)
     step *= data_.shape()[i];
   int srclayer_offset = 0;
   int concate_offset = 0;
+  auto context = Singleton<Context>::Instance();
+  int device = context->device_id(std::this_thread::get_id());
   while (concate_offset < data_.count()) {
     for (size_t i = 0; i < srclayers.size(); ++i) {
-      const float* src = srclayers[i]->data(this).cpu_data() + srclayer_offset;
-      float* dst = data_.mutable_cpu_data() + concate_offset;
-      memcpy(dst, src, step * sizeof(float));
+      if (device == -1) {
+        const float* src = srclayers[i]->data(this).cpu_data()
+          + srclayer_offset;
+        float* dst = data_.mutable_cpu_data() + concate_offset;
+        memcpy(dst, src, step * sizeof(float));
+      } else {
+#ifdef USE_GPU
+        const float* src = srclayers[i]->data(this).gpu_data()
+          + srclayer_offset;
+        float* dst = data_.mutable_gpu_data() + concate_offset;
+        cudaMemcpy(dst, src, step * sizeof(float), cudaMemcpyDefault);
+#else
+        LOG(FATAL) << "GPU is supported";
+#endif
+      }
       concate_offset += step;
     }
     srclayer_offset += step;
@@ -67,19 +83,32 @@ void ConcateLayer::ComputeFeature(int flag, const 
vector<Layer*>& srclayers) {
 
 void ConcateLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
   CHECK_GT(srclayers.size(), 1);
-  CHECK_EQ(num_concates, srclayers.size());
+  CHECK_EQ(num_concates_, srclayers.size());
   // calculate step for each memcpy
-  int step = srclayers[0]->grad(this).shape()[concate_dim];
-  for (unsigned i = concate_dim + 1; i < grad_.shape().size(); ++i)
+  int step = srclayers[0]->grad(this).shape()[concate_dim_];
+  for (unsigned i = concate_dim_ + 1; i < grad_.shape().size(); ++i)
     step *= grad_.shape()[i];
   int srclayer_offset = 0;
   int concate_offset = 0;
+  auto context = Singleton<Context>::Instance();
+  int device = context->device_id(std::this_thread::get_id());
   while (concate_offset < grad_.count()) {
     for (size_t i = 0; i < srclayers.size(); ++i) {
-      const float* src = grad_.cpu_data() + concate_offset;
-      float* dst = srclayers[i]->mutable_grad(this)->mutable_cpu_data()
-                   + srclayer_offset;
-      memcpy(dst, src, step * sizeof(float));
+      if (device == -1) {
+        const float* src = grad_.cpu_data() + concate_offset;
+        float* dst = srclayers[i]->mutable_grad(this)->mutable_cpu_data()
+          + srclayer_offset;
+        memcpy(dst, src, step * sizeof(float));
+      } else {
+#ifdef USE_GPU
+        const float* src = grad_.gpu_data() + concate_offset;
+        float* dst = srclayers[i]->mutable_grad(this)->mutable_gpu_data()
+          + srclayer_offset;
+        cudaMemcpy(dst, src, step * sizeof(float), cudaMemcpyDefault);
+#else
+        LOG(FATAL) << "GPU is supported";
+#endif
+      }
       concate_offset += step;
     }
     srclayer_offset += step;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/src/neuralnet/connection_layer/slice.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/slice.cc 
b/src/neuralnet/connection_layer/slice.cc
index b625c66..15fdc48 100644
--- a/src/neuralnet/connection_layer/slice.cc
+++ b/src/neuralnet/connection_layer/slice.cc
@@ -21,6 +21,8 @@
 
 #include "singa/neuralnet/connection_layer.h"
 #include "singa/utils/math_blob.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
 
 namespace singa {
 
@@ -38,20 +40,20 @@ void SliceLayer::Setup(const LayerProto& conf,
   CHECK_EQ(srclayers.size(), 1);
   Layer::Setup(conf, srclayers);
   vector<int> shape = srclayers[0]->data(this).shape();
-  slice_dim = conf.slice_conf().slice_dim();
-  num_slices = conf.slice_conf().num_slices();
-  CHECK_GE(slice_dim, 0);
-  CHECK_LT(slice_dim, shape.size());
-  CHECK_GT(num_slices, 0);
+  slice_dim_ = conf.slice_conf().slice_dim();
+  num_slices_ = conf.slice_conf().num_slices();
+  CHECK_GE(slice_dim_, 0);
+  CHECK_LT(slice_dim_, shape.size());
+  CHECK_GT(num_slices_, 0);
   // add num_slices-1 more blobs
-  for (int i = 1; i < num_slices; ++i) {
+  for (int i = 1; i < num_slices_; ++i) {
     datavec_.push_back(new Blob<float>());
     gradvec_.push_back(new Blob<float>());
   }
   // TODO(wangsh): remove equal-size restrict later
-  CHECK_EQ(shape[slice_dim] % num_slices, 0);
-  shape[slice_dim] /= num_slices;
-  for (int i = 0; i < num_slices; ++i) {
+  CHECK_EQ(shape[slice_dim_] % num_slices_, 0);
+  shape[slice_dim_] /= num_slices_;
+  for (int i = 0; i < num_slices_; ++i) {
     // if (i == slice_num - 1) shape[slice_dim] += remain;
     datavec_[i]->Reshape(shape);
     gradvec_[i]->Reshape(shape);
@@ -62,16 +64,28 @@ void SliceLayer::ComputeFeature(int flag, const 
vector<Layer*>& srclayers) {
   CHECK_EQ(srclayers.size(), 1);
   const Blob<float>& blob = srclayers[0]->data(this);
   // calculate step for each memcpy
-  int step = datavec_[0]->shape()[slice_dim];
-  for (unsigned i = slice_dim + 1; i < datavec_[0]->shape().size(); ++i)
+  int step = datavec_[0]->shape()[slice_dim_];
+  for (unsigned i = slice_dim_ + 1; i < datavec_[0]->shape().size(); ++i)
     step *= datavec_[0]->shape()[i];
   int srclayer_offset = 0;
   int slice_offset = 0;
+  auto context = Singleton<Context>::Instance();
+  int device = context->device_id(std::this_thread::get_id());
   while (srclayer_offset < blob.count()) {
-    for (int i = 0; i < num_slices; ++i) {
-      const float* src = blob.cpu_data() + srclayer_offset;
-      float* dst = datavec_[i]->mutable_cpu_data() + slice_offset;
-      memcpy(dst, src, step * sizeof(float));
+    for (int i = 0; i < num_slices_; ++i) {
+      if (device == -1) {
+        const float* src = blob.cpu_data() + srclayer_offset;
+        float* dst = datavec_[i]->mutable_cpu_data() + slice_offset;
+        memcpy(dst, src, step * sizeof(float));
+      } else {
+#ifdef USE_GPU
+        const float* src = blob.gpu_data() + srclayer_offset;
+        float* dst = datavec_[i]->mutable_gpu_data() + slice_offset;
+        cudaMemcpy(dst, src, step * sizeof(float), cudaMemcpyDefault);
+#else
+        LOG(FATAL) << "GPU is supported";
+#endif
+      }
       srclayer_offset += step;
     }
     slice_offset += step;
@@ -82,16 +96,28 @@ void SliceLayer::ComputeGradient(int flag, const 
vector<Layer*>& srclayers) {
   CHECK_EQ(srclayers.size(), 1);
   Blob<float>* blob = srclayers[0]->mutable_grad(this);
   // calculate step for each memcpy
-  int step = gradvec_[0]->shape()[slice_dim];
-  for (size_t i = slice_dim + 1; i < gradvec_[0]->shape().size(); ++i)
+  int step = gradvec_[0]->shape()[slice_dim_];
+  for (size_t i = slice_dim_ + 1; i < gradvec_[0]->shape().size(); ++i)
     step *= gradvec_[0]->shape()[i];
   int srclayer_offset = 0;
   int slice_offset = 0;
+  auto context = Singleton<Context>::Instance();
+  int device = context->device_id(std::this_thread::get_id());
   while (srclayer_offset < blob->count()) {
-    for (int i = 0; i < num_slices; ++i) {
-      const float* src = gradvec_[i]->cpu_data() + slice_offset;
-      float* dst = blob->mutable_cpu_data() + srclayer_offset;
-      memcpy(dst, src, step * sizeof(float));
+    for (int i = 0; i < num_slices_; ++i) {
+      if (device == -1) {
+        const float* src = gradvec_[i]->cpu_data() + slice_offset;
+        float* dst = blob->mutable_cpu_data() + srclayer_offset;
+        memcpy(dst, src, step * sizeof(float));
+      } else {
+#ifdef USE_GPU
+        const float* src = gradvec_[i]->gpu_data() + slice_offset;
+        float* dst = blob->mutable_gpu_data() + srclayer_offset;
+        cudaMemcpy(dst, src, step * sizeof(float), cudaMemcpyDefault);
+#else
+        LOG(FATAL) << "GPU is supported";
+#endif
+      }
       srclayer_offset += step;
     }
     slice_offset += step;
@@ -100,27 +126,27 @@ void SliceLayer::ComputeGradient(int flag, const 
vector<Layer*>& srclayers) {
 
 const Blob<float>& SliceLayer::data(const Layer* from) {
   int idx = from ? layer_idx_.Get(from) : 0;
-  CHECK_LT(idx, num_slices);
+  CHECK_LT(idx, num_slices_);
   return *datavec_[idx];
 }
 
 const Blob<float>& SliceLayer::grad(const Layer* from) {
   int idx = from ? layer_idx_.Get(from) : 0;
-  CHECK_LT(idx, num_slices);
+  CHECK_LT(idx, num_slices_);
   return *gradvec_[idx];
 }
 
 Blob<float>* SliceLayer::mutable_data(const Layer* from) {
   CHECK(from);
   int idx = layer_idx_.Get(from);
-  CHECK_LT(idx, num_slices);
+  CHECK_LT(idx, num_slices_);
   return datavec_[idx];
 }
 
 Blob<float>* SliceLayer::mutable_grad(const Layer* from) {
   CHECK(from);
   int idx = layer_idx_.Get(from);
-  CHECK_LT(idx, num_slices);
+  CHECK_LT(idx, num_slices_);
   return gradvec_[idx];
 }
 const std::string SliceLayer::ToString(bool debug, int flag) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/src/neuralnet/connection_layer/split.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/split.cc 
b/src/neuralnet/connection_layer/split.cc
index a9270fb..e46b902 100644
--- a/src/neuralnet/connection_layer/split.cc
+++ b/src/neuralnet/connection_layer/split.cc
@@ -37,14 +37,14 @@ void SplitLayer::Setup(const LayerProto& conf,
   CHECK_EQ(srclayers.size(), 1);
   Layer::Setup(conf, srclayers);
   data_.Reshape(srclayers[0]->data(this).shape());
-  data_.ShareData(srclayers[0]->data(this), false);
-  int num_splits = conf.split_conf().num_splits();
-  CHECK_GT(num_splits, 0);
+  data_.ShareData(srclayers[0]->mutable_data(this), false);
+  num_splits_ = conf.split_conf().num_splits();
+  CHECK_GT(num_splits_, 0);
   // add num_splits-1 more grad blobs
-  for (int i = 1; i < num_splits; ++i) {
+  for (int i = 1; i < num_splits_; ++i) {
     gradvec_.push_back(new Blob<float>());
   }
-  for (int i = 0; i < num_splits; ++i)
+  for (int i = 0; i < num_splits_; ++i)
     gradvec_[i]->Reshape(srclayers[0]->data(this).shape());
 }
 
@@ -56,7 +56,7 @@ void SplitLayer::ComputeFeature(int flag, const 
vector<Layer*>& srclayers) {
 void SplitLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
   CHECK_EQ(srclayers.size(), 1);
   // aggregate all gradients to grad_[0]
-  for (int i = 1; i < num_splits; ++i)
+  for (int i = 1; i < num_splits_; ++i)
     AXPY<float>(1.0, *gradvec_[i], gradvec_[0]);
   // copy grad_[0] to srclayer's grad
   Copy(*gradvec_[0], srclayers[0]->mutable_grad(this));
@@ -65,14 +65,14 @@ void SplitLayer::ComputeGradient(int flag, const 
vector<Layer*>& srclayers) {
 const Blob<float>& SplitLayer::grad(const Layer* from) {
   CHECK(from);
   int idx = layer_idx_.Get(from);
-  CHECK_LT(idx, num_splits);
+  CHECK_LT(idx, num_splits_);
   return *gradvec_[idx];
 }
 
 Blob<float>* SplitLayer::mutable_grad(const Layer* from) {
   CHECK(from);
   int idx = layer_idx_.Get(from);
-  CHECK_LT(idx, num_splits);
+  CHECK_LT(idx, num_splits_);
   return gradvec_[idx];
 }
 const std::string SplitLayer::ToString(bool debug, int flag) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bf4cd3bc/src/utils/graph.cc
----------------------------------------------------------------------
diff --git a/src/utils/graph.cc b/src/utils/graph.cc
index 0211e5a..4f59635 100644
--- a/src/utils/graph.cc
+++ b/src/utils/graph.cc
@@ -97,7 +97,10 @@ Node* Graph::AddNode(const string& name, const string& 
origin, int id,
 Node* Graph::AddNode(const string& name,
                      const std::map<string, string>& attrs) {
   Node* node = new Node(name, attrs);
-  AddNode(node);
+  nodes_.push_back(node);
+  CHECK(name2node_.find(node->name) == name2node_.end())
+    << "node " << node->name << " already exists";
+  name2node_[node->name] = node;
   return node;
 }
 

Reply via email to