SINGA-111 Add slice, concate and split layers Add split layer implementation and test case
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7e414e5e Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7e414e5e Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7e414e5e Branch: refs/heads/master Commit: 7e414e5e3c468eb4c225e09448d1f03a345a3152 Parents: 3ea1eb6 Author: WANG Sheng <[email protected]> Authored: Tue Dec 8 22:05:50 2015 +0800 Committer: WANG Sheng <[email protected]> Committed: Thu Dec 10 16:33:50 2015 +0800 ---------------------------------------------------------------------- .../singa/neuralnet/connection_layer/split.h | 7 ++- src/neuralnet/connection_layer/split.cc | 33 +++++++++-- src/proto/job.proto | 2 +- src/test/test_connection_layers.cc | 58 ++++++++++++++++++++ 4 files changed, 92 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7e414e5e/include/singa/neuralnet/connection_layer/split.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/connection_layer/split.h b/include/singa/neuralnet/connection_layer/split.h index 7557766..f4de238 100644 --- a/include/singa/neuralnet/connection_layer/split.h +++ b/include/singa/neuralnet/connection_layer/split.h @@ -38,9 +38,12 @@ class SplitLayer : public ConnectionLayer { void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + const Blob<float>& grad(const Layer* from) const override; + Blob<float>* mutable_grad(const Layer* from) override; - protected: - Blob<float> grads_; + private: + std::vector<Blob<float>> grads_; + int split_num_; }; } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7e414e5e/src/neuralnet/connection_layer/split.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/connection_layer/split.cc b/src/neuralnet/connection_layer/split.cc index beb5619..a9fd291 100644 --- a/src/neuralnet/connection_layer/split.cc +++ b/src/neuralnet/connection_layer/split.cc @@ -26,19 +26,42 @@ namespace singa { using std::vector; void SplitLayer::Setup(const LayerProto& conf, - const vector<Layer*>& srclayers) { - Layer::Setup(conf, srclayers); + const vector<Layer*>& srclayers) { CHECK_EQ(srclayers.size(), 1); + Layer::Setup(conf, srclayers); + split_num_ = conf.split_conf().split_num(); data_.Reshape(srclayers[0]->data(this).shape()); - grad_.Reshape(srclayers[0]->data(this).shape()); + data_.ShareData(srclayers[0]->data(this)); + grads_.resize(split_num_); + for (int i = 0; i < split_num_; ++i) + grads_[i].Reshape(srclayers[0]->data(this).shape()); } void SplitLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) { - LOG(FATAL) << "Not implemented"; + // data is shared from its source, + // nothing to do in compute feature phase } void SplitLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) { - LOG(FATAL) << "Not implemented"; + CHECK_EQ(srclayers.size(), 1); + // aggregate all gradients to grad_[0] + for (int i = 1; i < split_num_; ++i) + for (int j = 0; j < grads_[0].count(); ++j) + grads_[0].mutable_cpu_data()[j] += grads_[i].cpu_data()[j]; + // copy grad_[0] to srclayer's grad + srclayers[0]->mutable_grad(this)->CopyFrom(grads_[0]); +} + +const Blob<float>& SplitLayer::grad(const Layer* from) const { + CHECK(from); + CHECK_LT(from->partition_id(), grads_.size()); + return grads_[from->partition_id()]; +} + +Blob<float>* SplitLayer::mutable_grad(const Layer* from) { + CHECK(from); + CHECK_LT(from->partition_id(), grads_.size()); + return &grads_[from->partition_id()]; } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7e414e5e/src/proto/job.proto ---------------------------------------------------------------------- diff --git a/src/proto/job.proto b/src/proto/job.proto index ef8f23f..7a30b73 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -320,7 +320,7 @@ message PrefetchProto { } message SplitProto { - optional int32 num_splits = 1 [default = 1]; + optional int32 split_num = 1 [default = 1]; } message StoreProto { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7e414e5e/src/test/test_connection_layers.cc ---------------------------------------------------------------------- diff --git a/src/test/test_connection_layers.cc b/src/test/test_connection_layers.cc index e072114..4d8c984 100644 --- a/src/test/test_connection_layers.cc +++ b/src/test/test_connection_layers.cc @@ -28,6 +28,7 @@ #include "singa/neuralnet/connection_layer/bridge.h" #include "singa/neuralnet/connection_layer/concate.h" #include "singa/neuralnet/connection_layer/slice.h" +#include "singa/neuralnet/connection_layer/split.h" #include "singa/neuralnet/neuron_layer/dummy.h" #include "singa/proto/job.pb.h" @@ -396,3 +397,60 @@ TEST(ConnectionLayerTest, ModelConcateTest) { out.grad(nullptr).cpu_data()[i]); } } + +TEST(ConnectionLayerTest, SplitTest) { + // use dummy as input layer + vector<Layer*> src_in; + LayerProto proto_in; + proto_in.set_name("dummy_input"); + proto_in.mutable_dummy_conf()->set_input(true); + proto_in.mutable_dummy_conf()->add_shape(N); + proto_in.mutable_dummy_conf()->add_shape(M); + DummyLayer in; + in.Setup(proto_in, src_in); + + // add split layer + vector<Layer*> src_split; + src_split.push_back(static_cast<Layer*>(&in)); + LayerProto proto_split; + proto_split.set_name("split"); + proto_split.mutable_split_conf()->set_split_num(K); + SplitLayer split; + split.Setup(proto_split, src_split); + ASSERT_EQ(split.data(static_cast<Layer*>(&split)).shape(0), N); + ASSERT_EQ(split.data(static_cast<Layer*>(&split)).shape(1), M); + + // use dummy as output layers + LayerProto proto_out[K]; + vector<Layer*> src_out[K]; + DummyLayer out[K]; + for (int i = 0; i < K; ++i) { + src_out[i].push_back(static_cast<Layer*>(&split)); + proto_out[i].set_name("dummy_output_"+std::to_string(i)); + proto_out[i].set_partition_id(i); + proto_out[i].mutable_dummy_conf()->set_output(true); + out[i].Setup(proto_out[i], src_out[i]); + } + + // test for computing feature + in.ComputeFeature(0, src_in); + split.ComputeFeature(0, src_split); + for (int i = 0; i < K; ++i) + out[i].ComputeFeature(0, src_out[i]); + for (int i = 0; i < in.data(nullptr).count(); ++i) { + for (int k = 0; k < K; ++k) + ASSERT_EQ(in.data(nullptr).cpu_data()[i], + out[k].data(nullptr).cpu_data()[i]); + } + + // test for computing gradient + for (int i = 0; i < K; ++i) + out[i].ComputeGradient(0, src_out[i]); + split.ComputeGradient(0, src_split); + in.ComputeGradient(0, src_in); + for (int i = 0; i < in.grad(nullptr).count(); ++i) { + float grad = 0; + for (int k = 0; k < K; ++k) grad += out[k].grad(nullptr).cpu_data()[i]; + ASSERT_EQ(in.grad(nullptr).cpu_data()[i], grad); + } +}
