Repository: incubator-singa
Updated Branches:
  refs/heads/master f16b1be6f -> 7cdb22f68


SINGA-111 Add slice, concate and split layers

Add slice layer implementation and test case


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/28e48a6f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/28e48a6f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/28e48a6f

Branch: refs/heads/master
Commit: 28e48a6f5abddfa17dd473ef912ace9fefe7ddaf
Parents: f16b1be
Author: WANG Sheng <[email protected]>
Authored: Tue Dec 8 18:23:02 2015 +0800
Committer: WANG Sheng <[email protected]>
Committed: Thu Dec 10 16:22:43 2015 +0800

----------------------------------------------------------------------
 .../singa/neuralnet/connection_layer/slice.h    |   4 +
 src/neuralnet/connection_layer/slice.cc         | 108 ++++++++-----
 src/proto/job.proto                             |   1 +
 src/test/test_connection_layers.cc              | 156 +++++++++++++++++--
 4 files changed, 212 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28e48a6f/include/singa/neuralnet/connection_layer/slice.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer/slice.h 
b/include/singa/neuralnet/connection_layer/slice.h
index 615685b..a2f715c 100644
--- a/include/singa/neuralnet/connection_layer/slice.h
+++ b/include/singa/neuralnet/connection_layer/slice.h
@@ -37,6 +37,10 @@ class SliceLayer : public ConnectionLayer {
   void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) 
override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+  const Blob<float>& data(const Layer* from) const override;
+  const Blob<float>& grad(const Layer* from) const override;
+  Blob<float>* mutable_data(const Layer* from) override;
+  Blob<float>* mutable_grad(const Layer* from) override;
 
  private:
   std::vector<Blob<float>> datavec_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28e48a6f/src/neuralnet/connection_layer/slice.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/slice.cc 
b/src/neuralnet/connection_layer/slice.cc
index 8acbf94..66d3578 100644
--- a/src/neuralnet/connection_layer/slice.cc
+++ b/src/neuralnet/connection_layer/slice.cc
@@ -26,61 +26,89 @@ namespace singa {
 using std::vector;
 
 void SliceLayer::Setup(const LayerProto& conf,
-    const vector<Layer*>& srclayers) {
-  /*
-  Layer::Setup(conf, npartitions);
+                       const vector<Layer*>& srclayers) {
+  CHECK_EQ(srclayers.size(), 1);
+  Layer::Setup(conf, srclayers);
   slice_dim_ = conf.slice_conf().slice_dim();
-  slice_num_ = npartitions;
+  slice_num_ = conf.slice_conf().slice_num();
+  vector<int> shape = srclayers[0]->data(this).shape();
   CHECK_GE(slice_dim_, 0);
-  CHECK_EQ(slice_num_, dstlayers_.size());
-  data_.Reshape(srclayers[0]->data(this).shape());
-  grad_.ReshapeLike(data_);
+  CHECK_LT(slice_dim_, shape.size());
+  CHECK_GT(slice_num_, 0);
   datavec_.resize(slice_num_);
   gradvec_.resize(slice_num_);
-  CHECK_EQ(data_.count() % slice_num_, 0);  // restrict equal slicing
-  // LOG(ERROR)<<"slice dim "<<slice_dim<<" slice num "<<slice_num;
-  for (int i = 0; i < slice_num_; i++) {
-    vector<int> newshape(data_.shape());
-    newshape[slice_dim_] = newshape[slice_dim_] / slice_num_ +
-      ((i == slice_num_ - 1) ? newshape[slice_dim_] % slice_num_ : 0);
-    datavec_[i].Reshape(newshape);
-    gradvec_[i].Reshape(newshape);
-    // LOG(ERROR)<<"slice "<<IntVecToString(newshape);
+  // TODO(wangsh): remove equal-size restrict later
+  CHECK_EQ(shape[slice_dim_] % slice_num_, 0);
+  shape[slice_dim_] /= slice_num_;
+  for (int i = 0; i < slice_num_; ++i) {
+    // if (i == slice_num - 1) shape[slice_dim_] += remain;
+    datavec_[i].Reshape(shape);
+    gradvec_[i].Reshape(shape);
   }
-  */
-  LOG(FATAL) << "Not implemented";
 }
 
 void SliceLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
-  /*
   CHECK_EQ(srclayers.size(), 1);
-  if (slice_dim_ == 0) {
-    const auto& blob = srclayers.at(0)->data(this);
-    int size = blob.count() / slice_num_;
-    for (int i = 0; i < slice_num_; i++) {
-      float* dst = datavec_[i].mutable_cpu_data();
-      const float* src = blob.cpu_data() + i * size;
-      memcpy(dst, src, size*sizeof(float));
+  const Blob<float>& blob = srclayers[0]->data(this);
+  // calculate step for each memcpy
+  int step = datavec_[0].shape()[slice_dim_];
+  for (unsigned i = slice_dim_ + 1; i < datavec_[0].shape().size(); ++i)
+    step *= datavec_[0].shape()[i];
+  int srclayer_offset = 0;
+  int slice_offset = 0;
+  while (srclayer_offset < blob.count()) {
+    for (int i = 0; i < slice_num_; ++i) {
+      const float* src = blob.cpu_data() + srclayer_offset;
+      float* dst = datavec_[i].mutable_cpu_data() + slice_offset;
+      memcpy(dst, src, step * sizeof(float));
+      srclayer_offset += step;
     }
+    slice_offset += step;
   }
-  */
-  LOG(FATAL) << "Not implemented";
 }
 
 void SliceLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-  LOG(FATAL) << "Not implemented";
+  CHECK_EQ(srclayers.size(), 1);
+  Blob<float>* blob = srclayers[0]->mutable_grad(this);
+  // calculate step for each memcpy
+  int step = gradvec_[0].shape()[slice_dim_];
+  for (size_t i = slice_dim_ + 1; i < gradvec_[0].shape().size(); ++i)
+    step *= gradvec_[0].shape()[i];
+  int srclayer_offset = 0;
+  int slice_offset = 0;
+  while (srclayer_offset < blob->count()) {
+    for (int i = 0; i < slice_num_; ++i) {
+      const float* src = gradvec_[i].cpu_data() + slice_offset;
+      float* dst = blob->mutable_cpu_data() + srclayer_offset;
+      memcpy(dst, src, step * sizeof(float));
+      srclayer_offset += step;
+    }
+    slice_offset += step;
+  }
 }
 
-/*
-int SliceLayer::SliceID(const Layer* layer) const {
-  CHECK(layer != nullptr);
-  for (size_t i = 0; i < datavec_.size(); i++) {
-    // LOG(ERROR)<<"get slice "<<IntVecToString(shapes_[i]);
-    if (dstlayers_[i] == layer)
-      return i;
-  }
-  CHECK(false);
-  return -1;
-}*/
+const Blob<float>& SliceLayer::data(const Layer* from) const {
+  CHECK(from);
+  CHECK_LT(from->partition_id(), datavec_.size());
+  return datavec_[from->partition_id()];
+}
+
+const Blob<float>& SliceLayer::grad(const Layer* from) const {
+  CHECK(from);
+  CHECK_LT(from->partition_id(), gradvec_.size());
+  return gradvec_[from->partition_id()];
+}
+
+Blob<float>* SliceLayer::mutable_data(const Layer* from) {
+  CHECK(from);
+  CHECK_LT(from->partition_id(), datavec_.size());
+  return &datavec_[from->partition_id()];
+}
+
+Blob<float>* SliceLayer::mutable_grad(const Layer* from) {
+  CHECK(from);
+  CHECK_LT(from->partition_id(), gradvec_.size());
+  return &gradvec_[from->partition_id()];
+}
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28e48a6f/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index d3f51b2..ef8f23f 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -453,6 +453,7 @@ message PoolingProto {
 
 message SliceProto {
   required int32 slice_dim = 1;
+  required int32 slice_num = 2;
 }
 
 message ReLUProto {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28e48a6f/src/test/test_connection_layers.cc
----------------------------------------------------------------------
diff --git a/src/test/test_connection_layers.cc 
b/src/test/test_connection_layers.cc
index 837a941..a2cb1d3 100644
--- a/src/test/test_connection_layers.cc
+++ b/src/test/test_connection_layers.cc
@@ -26,23 +26,28 @@
 #include "singa/comm/msg.h"
 #include "singa/comm/socket.h"
 #include "singa/neuralnet/connection_layer/bridge.h"
+#include "singa/neuralnet/connection_layer/slice.h"
 #include "singa/neuralnet/neuron_layer/dummy.h"
 #include "singa/proto/job.pb.h"
 
 using namespace singa;
 
+const int N = 10;  // size of dim 0
+const int M = 20;  // size of dim 1
+const int K = 5;  // size of partitions
+
 TEST(ConnectionLayerTest, DummyTest) {
   // use dummy as input layer
   vector<Layer*> src_in;
   LayerProto proto_in;
   proto_in.set_name("dummy_input");
   proto_in.mutable_dummy_conf()->set_input(true);
-  proto_in.mutable_dummy_conf()->add_shape(10);
-  proto_in.mutable_dummy_conf()->add_shape(20);
+  proto_in.mutable_dummy_conf()->add_shape(N);
+  proto_in.mutable_dummy_conf()->add_shape(M);
   DummyLayer in;
   in.Setup(proto_in, src_in);
-  ASSERT_EQ(in.data(nullptr).shape(0), 10);
-  ASSERT_EQ(in.data(nullptr).shape(1), 20);
+  ASSERT_EQ(in.data(nullptr).shape(0), N);
+  ASSERT_EQ(in.data(nullptr).shape(1), M);
   in.ComputeFeature(0, src_in);
 
   // use dummy as neuron layer
@@ -53,8 +58,8 @@ TEST(ConnectionLayerTest, DummyTest) {
   proto_neu.mutable_dummy_conf();
   DummyLayer neu;
   neu.Setup(proto_neu, src_neu);
-  ASSERT_EQ(neu.data(nullptr).shape(0), 10);
-  ASSERT_EQ(neu.data(nullptr).shape(1), 20);
+  ASSERT_EQ(neu.data(nullptr).shape(0), N);
+  ASSERT_EQ(neu.data(nullptr).shape(1), M);
   neu.ComputeFeature(0, src_neu);
   ASSERT_EQ(in.data(nullptr).count(), neu.data(nullptr).count());
   for (int i = 0; i < in.data(nullptr).count(); ++i)
@@ -68,8 +73,8 @@ TEST(ConnectionLayerTest, DummyTest) {
   proto_out.mutable_dummy_conf()->set_output(true);
   DummyLayer out;
   out.Setup(proto_out, src_out);
-  ASSERT_EQ(out.data(nullptr).shape(0), 10);
-  ASSERT_EQ(out.data(nullptr).shape(1), 20);
+  ASSERT_EQ(out.data(nullptr).shape(0), N);
+  ASSERT_EQ(out.data(nullptr).shape(1), M);
   out.ComputeFeature(0, src_out);
   ASSERT_EQ(in.data(nullptr).count(), out.data(nullptr).count());
   for (int i = 0; i < in.data(nullptr).count(); ++i)
@@ -83,15 +88,14 @@ TEST(ConnectionLayerTest, DummyTest) {
     ASSERT_EQ(in.grad(nullptr).cpu_data()[i], out.grad(nullptr).cpu_data()[i]);
 }
 
-
 TEST(ConnectionLayerTest, BridgeTest) {
   // use dummy as input layer
   vector<Layer*> src_in;
   LayerProto proto_in;
   proto_in.set_name("dummy_input");
   proto_in.mutable_dummy_conf()->set_input(true);
-  proto_in.mutable_dummy_conf()->add_shape(10);
-  proto_in.mutable_dummy_conf()->add_shape(20);
+  proto_in.mutable_dummy_conf()->add_shape(N);
+  proto_in.mutable_dummy_conf()->add_shape(M);
   DummyLayer in;
   in.Setup(proto_in, src_in);
 
@@ -99,11 +103,11 @@ TEST(ConnectionLayerTest, BridgeTest) {
   vector<Layer*> src_src;
   src_src.push_back(static_cast<Layer*>(&in));
   LayerProto proto_src;
-  proto_in.set_name("bridge_src");
+  proto_src.set_name("bridge_src");
   BridgeSrcLayer src;
   src.Setup(proto_src, src_src);
-  ASSERT_EQ(src.data(nullptr).shape(0), 10);
-  ASSERT_EQ(src.data(nullptr).shape(1), 20);
+  ASSERT_EQ(src.data(nullptr).shape(0), N);
+  ASSERT_EQ(src.data(nullptr).shape(1), M);
 
   // add dst bridge layer
   vector<Layer*> src_dst;
@@ -112,11 +116,11 @@ TEST(ConnectionLayerTest, BridgeTest) {
   proto_dst.set_name("bridge_dst");
   BridgeDstLayer dst;
   dst.Setup(proto_dst, src_dst);
-  ASSERT_EQ(dst.data(nullptr).shape(0), 10);
-  ASSERT_EQ(dst.data(nullptr).shape(1), 20);
+  ASSERT_EQ(dst.data(nullptr).shape(0), N);
+  ASSERT_EQ(dst.data(nullptr).shape(1), M);
 
   // bind bridges to socket
-  Router router(10);
+  Router router(N);
   router.Bind("inproc://router");
   Dealer dealer(0);
   dealer.Connect("inproc://router");
@@ -155,3 +159,121 @@ TEST(ConnectionLayerTest, BridgeTest) {
   for (int i = 0; i < in.grad(nullptr).count(); ++i)
     ASSERT_EQ(in.grad(nullptr).cpu_data()[i], out.grad(nullptr).cpu_data()[i]);
 }
+
+TEST(ConnectionLayerTest, DataSliceTest) {
+  // use dummy as input layer
+  vector<Layer*> src_in;
+  LayerProto proto_in;
+  proto_in.set_name("dummy_input");
+  proto_in.mutable_dummy_conf()->set_input(true);
+  proto_in.mutable_dummy_conf()->add_shape(N);
+  proto_in.mutable_dummy_conf()->add_shape(M);
+  DummyLayer in;
+  in.Setup(proto_in, src_in);
+
+  // add slice layer
+  vector<Layer*> src_slice;
+  src_slice.push_back(static_cast<Layer*>(&in));
+  LayerProto proto_slice;
+  proto_slice.set_name("slice");
+  proto_slice.mutable_slice_conf()->set_slice_dim(0);
+  proto_slice.mutable_slice_conf()->set_slice_num(K);
+  SliceLayer slice;
+  slice.Setup(proto_slice, src_slice);
+  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N / K);
+  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M);
+
+  // use dummy as output layers
+  LayerProto proto_out[K];
+  vector<Layer*> src_out[K];
+  DummyLayer out[K];
+  for (int i = 0; i < K; ++i) {
+    src_out[i].push_back(static_cast<Layer*>(&slice));
+    proto_out[i].set_name("dummy_output_"+std::to_string(i));
+    proto_out[i].set_partition_id(i);
+    proto_out[i].mutable_dummy_conf()->set_output(true);
+    out[i].Setup(proto_out[i], src_out[i]);
+  }
+
+  // test for computing feature
+  in.ComputeFeature(0, src_in);
+  slice.ComputeFeature(0, src_slice);
+  for (int i = 0; i < K; ++i)
+    out[i].ComputeFeature(0, src_out[i]);
+  int step = (N * M) / K;
+  for (int i = 0; i < in.data(nullptr).count(); ++i) {
+    ASSERT_EQ(in.data(nullptr).cpu_data()[i],
+              out[i / step].data(nullptr).cpu_data()[i % step]);
+  }
+
+  // test for computing gradient
+  for (int i = 0; i < K; ++i)
+    out[i].ComputeGradient(0, src_out[i]);
+  slice.ComputeGradient(0, src_slice);
+  in.ComputeGradient(0, src_in);
+  for (int i = 0; i < in.grad(nullptr).count(); ++i) {
+    ASSERT_EQ(in.grad(nullptr).cpu_data()[i],
+              out[i / step].grad(nullptr).cpu_data()[i % step]);
+  }
+}
+
+TEST(ConnectionLayerTest, ModelSliceTest) {
+  // use dummy as input layer
+  vector<Layer*> src_in;
+  LayerProto proto_in;
+  proto_in.set_name("dummy_input");
+  proto_in.mutable_dummy_conf()->set_input(true);
+  proto_in.mutable_dummy_conf()->add_shape(N);
+  proto_in.mutable_dummy_conf()->add_shape(M);
+  DummyLayer in;
+  in.Setup(proto_in, src_in);
+
+  // add slice layer
+  vector<Layer*> src_slice;
+  src_slice.push_back(static_cast<Layer*>(&in));
+  LayerProto proto_slice;
+  proto_slice.set_name("slice");
+  proto_slice.mutable_slice_conf()->set_slice_dim(1);
+  proto_slice.mutable_slice_conf()->set_slice_num(K);
+  SliceLayer slice;
+  slice.Setup(proto_slice, src_slice);
+  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N);
+  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M / K);
+
+  // use dummy as output layers
+  LayerProto proto_out[K];
+  vector<Layer*> src_out[K];
+  DummyLayer out[K];
+  for (int i = 0; i < K; ++i) {
+    src_out[i].push_back(static_cast<Layer*>(&slice));
+    proto_out[i].set_name("dummy_output_"+std::to_string(i));
+    proto_out[i].set_partition_id(i);
+    proto_out[i].mutable_dummy_conf()->set_output(true);
+    out[i].Setup(proto_out[i], src_out[i]);
+  }
+
+  // test for computing feature
+  in.ComputeFeature(0, src_in);
+  slice.ComputeFeature(0, src_slice);
+  for (int i = 0; i < K; ++i)
+    out[i].ComputeFeature(0, src_out[i]);
+  int step = M / K;
+  int offset = 0;
+  for (int i = 0; i < in.data(nullptr).count(); ++i) {
+    if (i && i % M == 0) offset += step;
+    ASSERT_EQ(in.data(nullptr).cpu_data()[i],
+              out[(i / step) % K].data(nullptr).cpu_data()[offset + i % step]);
+  }
+
+  // test for computing gradient
+  for (int i = 0; i < K; ++i)
+    out[i].ComputeGradient(0, src_out[i]);
+  slice.ComputeGradient(0, src_slice);
+  in.ComputeGradient(0, src_in);
+  offset = 0;
+  for (int i = 0; i < in.grad(nullptr).count(); ++i) {
+    if (i && i % M == 0) offset += step;
+    ASSERT_EQ(in.grad(nullptr).cpu_data()[i],
+              out[(i / step) % K].grad(nullptr).cpu_data()[offset + i % step]);
+  }
+}

Reply via email to