SINGA-188 Add Dense layer Minor change to format code and update IDs of DenseConf fields.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/64ea2065 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/64ea2065 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/64ea2065 Branch: refs/heads/dev Commit: 64ea2065411ed29d6870d75c8577cbe086f4daa7 Parents: 73d4a34 Author: Wei Wang <[email protected]> Authored: Thu Jun 2 12:02:16 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Thu Jun 2 12:02:16 2016 +0800 ---------------------------------------------------------------------- src/model/layer/dense.cc | 20 ++--- src/model/layer/dense.h | 15 ++-- src/proto/model.proto | 8 +- test/singa/test_dense.cc | 177 ++++++++++++++++++++++-------------------- 4 files changed, 115 insertions(+), 105 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc index ebee62a..29ff8cb 100644 --- a/src/model/layer/dense.cc +++ b/src/model/layer/dense.cc @@ -29,7 +29,7 @@ Dense::~Dense() { } void Dense::Setup(const LayerConf &conf) { Layer::Setup(conf); - DenseConf dense_conf = conf.dense_conf(); + auto dense_conf = conf.dense_conf(); hdim_ = dense_conf.num_output(); vdim_ = dense_conf.num_input(); transpose_ = dense_conf.transpose(); @@ -45,7 +45,8 @@ void Dense::Setup(const LayerConf &conf) { /// \copydoc Layer::Forward(int flag, const Tensor&) const Tensor Dense::Forward(int flag, const Tensor &input) { Tensor output; - if (transpose_) + + if (transpose_) // use the transposed version of weight_ for computing output = Mult(input, weight_); else output = Mult(input, weight_.T()); @@ -55,8 +56,8 @@ const Tensor Dense::Forward(int flag, const Tensor &input) { } /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&); -const std::pair<Tensor, vector<Tensor>> -Dense::Backward(int flag, const Tensor &grad) { +const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag, + const Tensor &grad) { vector<Tensor> param_grad; Tensor src_data = buf_.top(); buf_.pop(); @@ -65,11 +66,10 @@ Dense::Backward(int flag, const Tensor &grad) { dw.ResetLike(weight_); dx.ResetLike(src_data); SumRows(grad, &db); - if (transpose_){ - dx = Mult(grad, weight_.T()); + if (transpose_) { + dx = Mult(grad, weight_.T()); dw = Mult(src_data.T(), grad); - } - else{ + } else { dx = Mult(grad, weight_); dw = Mult(grad.T(), src_data); } @@ -78,8 +78,8 @@ Dense::Backward(int flag, const Tensor &grad) { return std::make_pair(dx, param_grad); } -void Dense::ToDevice(Device *device) { +void Dense::ToDevice(Device *device) { weight_.ToDevice(device); - bias_.ToDevice(device); + bias_.ToDevice(device); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.h ---------------------------------------------------------------------- diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h index d686a01..a5a6f66 100644 --- a/src/model/layer/dense.h +++ b/src/model/layer/dense.h @@ -33,7 +33,6 @@ class Dense : public Layer { /// \copydoc Layer::Setup(const LayerConf&); void Setup(const LayerConf& conf) override; - void SetupParam(const Tensor& input); /// \copydoc Layer::Forward(int flag, const Tensor&) const Tensor Forward(int flag, const Tensor& input) override; @@ -42,12 +41,12 @@ class Dense : public Layer { const Tensor& grad) override; void ToDevice(Device* device) override; - + size_t num_output() const { return hdim_; } size_t num_input() const { return vdim_; } bool transpose() const { return transpose_; } - const Tensor &weight() const { return weight_; } - const Tensor &bias() const { return bias_; } + const Tensor& weight() const { return weight_; } + const Tensor& bias() const { return bias_; } void set_weight(Tensor w) { weight_.ResetLike(w); @@ -58,9 +57,11 @@ class Dense : public Layer { bias_.CopyData(b); } -protected: - size_t batchsize_, vdim_, hdim_; - bool transpose_; + protected: + /// Used in auto-encoder, where the decoder would share its weight matrix from + /// the encoder's transposed weight matrix. + bool transpose_ = false; + size_t vdim_, hdim_; Tensor weight_, bias_; // Tensor data_, grad_; std::stack<Tensor> buf_; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/proto/model.proto ---------------------------------------------------------------------- diff --git a/src/proto/model.proto b/src/proto/model.proto index 75e2be7..16ba62f 100644 --- a/src/proto/model.proto +++ b/src/proto/model.proto @@ -208,11 +208,8 @@ message LayerConf { // optional ImageDataConf image_data_conf = 115; optional InfogainLossConf infogain_loss_conf = 116; optional InnerProductConf inner_product_conf = 117; - optional DenseConf dense_conf = 150; optional LogConf log_conf = 134; optional LRNConf lrn_conf = 118; - // Used in SINGA - optional MetricConf metric_conf = 200; // optional MemoryDataConf memory_data_conf = 119; optional MVNConf mvn_conf = 120; optional PoolingConf pooling_conf = 121; @@ -230,6 +227,10 @@ message LayerConf { optional ThresholdConf threshold_conf = 128; optional TileConf tile_conf = 138; //optional WindowDataConf window_data_conf = 129; + + // Used in SINGA + optional DenseConf dense_conf = 201; + optional MetricConf metric_conf = 200; } // Message that stores hyper-parameters used to apply transformation @@ -584,6 +585,7 @@ message DenseConf { // all preceding axes are retained in the output. // May be negative to index from the end (e.g., -1 for the last axis). optional int32 axis = 5 [default = 1]; + optional uint32 num_input = 20; // The number of inputs for the layer optional bool transpose = 21 [default = false]; // whether transpose or not } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/test/singa/test_dense.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc index 6d136af..606f819 100644 --- a/test/singa/test_dense.cc +++ b/test/singa/test_dense.cc @@ -19,8 +19,8 @@ * *************************************************************/ #include "../src/model/layer/dense.h" - #include "gtest/gtest.h" +#include "singa_config.h" using singa::Dense; TEST(Dense, Setup) { @@ -34,8 +34,8 @@ TEST(Dense, Setup) { denseconf->set_transpose(false); dense.Setup(conf); - EXPECT_EQ(3, dense.num_output()); - EXPECT_EQ(2, dense.num_input()); + EXPECT_EQ(3u, dense.num_output()); + EXPECT_EQ(2u, dense.num_input()); } TEST(Dense, ForwardCpp) { @@ -47,82 +47,40 @@ TEST(Dense, ForwardCpp) { denseconf->set_num_output(3); denseconf->set_transpose(false); dense.Setup(conf); - const size_t batchsize = 3, vdim = 2, hdim = 3; - const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f}; + const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; singa::Tensor in(singa::Shape{batchsize, vdim}); in.CopyDataFromHostPtr(x, batchsize * vdim); - + // set weight - const float we[hdim * vdim] = { - 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; + const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; singa::Tensor weight(singa::Shape{hdim, vdim}); weight.CopyDataFromHostPtr(we, hdim * vdim); - const float bia[hdim] = { - 1.0f, 1.0f, 1.0f}; + const float bia[hdim] = {1.0f, 1.0f, 1.0f}; singa::Tensor bias(singa::Shape{hdim}); bias.CopyDataFromHostPtr(bia, hdim); - + dense.set_weight(weight); dense.set_bias(bias); - + singa::Tensor out1 = dense.Forward(singa::kTrain, in); singa::CppCPU host(0, 1); const float *outptr1 = out1.data<const float *>(); - EXPECT_EQ(9, out1.Size()); + EXPECT_EQ(9u, out1.Size()); for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) - EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]); + EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), + outptr1[i * 3 + j]); } -TEST(Dense, ForwardCuda) { - Dense dense; - singa::LayerConf conf; - singa::DenseConf *denseconf = conf.mutable_dense_conf(); - denseconf->set_num_input(2); - denseconf->set_num_output(3); - denseconf->set_transpose(false); - dense.Setup(conf); - - - const size_t batchsize = 3, vdim = 2, hdim = 3; - const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f}; - singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); - in.CopyDataFromHostPtr(x, batchsize * vdim); - - // set weight - const float we[hdim * vdim] = { - 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; - singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); - weight.CopyDataFromHostPtr(we, hdim * vdim); - - const float bia[hdim] = { - 1.0f, 1.0f, 1.0f}; - singa::Tensor bias(singa::Shape{hdim}, &cuda); - bias.CopyDataFromHostPtr(bia, hdim); - - dense.set_weight(weight); - dense.set_bias(bias); - - singa::Tensor out1 = dense.Forward(singa::kTrain, in); - singa::CppCPU host(0, 1); - out1.ToDevice(&host); - const float *outptr1 = out1.data<const float *>(); - EXPECT_EQ(9, out1.Size()); - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]); -} TEST(Dense, BackwardCpp) { Dense dense; - + singa::LayerConf conf; singa::DenseConf *denseconf = conf.mutable_dense_conf(); denseconf->set_num_input(2); @@ -131,30 +89,28 @@ TEST(Dense, BackwardCpp) { dense.Setup(conf); const size_t batchsize = 3, vdim = 2, hdim = 3; - const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f}; + const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; singa::CudaGPU cuda(0, 1); singa::Tensor in(singa::Shape{batchsize, vdim}); in.CopyDataFromHostPtr(x, batchsize * vdim); // set weight - const float we[hdim * vdim] = { - 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; + const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; singa::Tensor weight(singa::Shape{hdim, vdim}); weight.CopyDataFromHostPtr(we, hdim * vdim); - - const float bia[hdim] = { - 1.0f, 1.0f, 1.0f}; + + const float bia[hdim] = {1.0f, 1.0f, 1.0f}; singa::Tensor bias(singa::Shape{hdim}); bias.CopyDataFromHostPtr(bia, hdim); - + dense.set_weight(weight); dense.set_bias(bias); singa::Tensor out1 = dense.Forward(singa::kTrain, in); // grad - const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f}; + const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, + 2.0f, 3.0f, 3.0f, 3.0f}; singa::Tensor grad(singa::Shape{batchsize, hdim}); grad.CopyDataFromHostPtr(dy, batchsize * hdim); @@ -164,24 +120,70 @@ TEST(Dense, BackwardCpp) { singa::Tensor dweight = ret.second.at(0); singa::Tensor dbias = ret.second.at(1); const float *dx = in_grad.data<const float *>(); - EXPECT_EQ(6, in_grad.Size()); + EXPECT_EQ(6u, in_grad.Size()); for (int i = 0; i < 3; i++) for (int j = 0; j < 2; j++) - EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]); + EXPECT_FLOAT_EQ( + (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + + dy[i * 3 + 2] * we[2 * 2 + j]), + dx[i * 2 + j]); const float *dweightx = dweight.data<const float *>(); - EXPECT_EQ(6, dweight.Size()); + EXPECT_EQ(6u, dweight.Size()); for (int i = 0; i < 3; i++) for (int j = 0; j < 2; j++) - EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]); + EXPECT_FLOAT_EQ( + (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + + dy[2 * 3 + i] * x[2 * 2 + j]), + dweightx[i * 2 + j]); const float *dbiasx = dbias.data<const float *>(); - EXPECT_EQ(3, dbias.Size()); + EXPECT_EQ(3u, dbias.Size()); for (int i = 0; i < 3; i++) EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); } +#ifdef USE_CUDA +TEST(Dense, ForwardCuda) { + Dense dense; + + singa::LayerConf conf; + singa::DenseConf *denseconf = conf.mutable_dense_conf(); + denseconf->set_num_input(2); + denseconf->set_num_output(3); + denseconf->set_transpose(false); + dense.Setup(conf); + + const size_t batchsize = 3, vdim = 2, hdim = 3; + const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + singa::CudaGPU cuda(0, 1); + singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); + in.CopyDataFromHostPtr(x, batchsize * vdim); + + // set weight + const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; + singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); + weight.CopyDataFromHostPtr(we, hdim * vdim); + + const float bia[hdim] = {1.0f, 1.0f, 1.0f}; + singa::Tensor bias(singa::Shape{hdim}, &cuda); + bias.CopyDataFromHostPtr(bia, hdim); + + dense.set_weight(weight); + dense.set_bias(bias); + + singa::Tensor out1 = dense.Forward(singa::kTrain, in); + singa::CppCPU host(0, 1); + out1.ToDevice(&host); + const float *outptr1 = out1.data<const float *>(); + EXPECT_EQ(9u, out1.Size()); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), + outptr1[i * 3 + j]); +} TEST(Dense, BackwardCuda) { Dense dense; - + singa::LayerConf conf; singa::DenseConf *denseconf = conf.mutable_dense_conf(); denseconf->set_num_input(2); @@ -190,30 +192,28 @@ TEST(Dense, BackwardCuda) { dense.Setup(conf); const size_t batchsize = 3, vdim = 2, hdim = 3; - const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, - 6.0f}; + const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; singa::CudaGPU cuda(0, 1); singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda); in.CopyDataFromHostPtr(x, batchsize * vdim); // set weight - const float we[hdim * vdim] = { - 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; + const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f}; singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda); weight.CopyDataFromHostPtr(we, hdim * vdim); - - const float bia[hdim] = { - 1.0f, 1.0f, 1.0f}; + + const float bia[hdim] = {1.0f, 1.0f, 1.0f}; singa::Tensor bias(singa::Shape{hdim}, &cuda); bias.CopyDataFromHostPtr(bia, hdim); - + dense.set_weight(weight); dense.set_bias(bias); singa::Tensor out1 = dense.Forward(singa::kTrain, in); // grad - const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f}; + const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, + 2.0f, 3.0f, 3.0f, 3.0f}; singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda); grad.CopyDataFromHostPtr(dy, batchsize * hdim); @@ -224,19 +224,26 @@ TEST(Dense, BackwardCuda) { singa::Tensor dbias = ret.second.at(1); in_grad.ToDevice(&host); const float *dx = in_grad.data<const float *>(); - EXPECT_EQ(6, in_grad.Size()); + EXPECT_EQ(6u, in_grad.Size()); for (int i = 0; i < 3; i++) for (int j = 0; j < 2; j++) - EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]); + EXPECT_FLOAT_EQ( + (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + + dy[i * 3 + 2] * we[2 * 2 + j]), + dx[i * 2 + j]); dweight.ToDevice(&host); const float *dweightx = dweight.data<const float *>(); - EXPECT_EQ(6, dweight.Size()); + EXPECT_EQ(6u, dweight.Size()); for (int i = 0; i < 3; i++) for (int j = 0; j < 2; j++) - EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]); + EXPECT_FLOAT_EQ( + (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + + dy[2 * 3 + i] * x[2 * 2 + j]), + dweightx[i * 2 + j]); dbias.ToDevice(&host); const float *dbiasx = dbias.data<const float *>(); - EXPECT_EQ(3, dbias.Size()); + EXPECT_EQ(3u, dbias.Size()); for (int i = 0; i < 3; i++) EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]); } +#endif
