SINGA-21 Code review 4 Update layers for RBM. The CD algorithm follows Hinton's science paper to do sampling (only hidden layer is sampled). May add configuration fields to control the sampling of each layer. Note. The first gibbs iteration samples the postivie data of the hidden layer (not the negative data, which is uninitialized).
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ae203036 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ae203036 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ae203036 Branch: refs/heads/master Commit: ae20303620a952033be67cc005765dd985fe5fe2 Parents: 63adbda Author: zhaojing <[email protected]> Authored: Thu Sep 3 16:10:21 2015 +0800 Committer: wangwei <[email protected]> Committed: Fri Sep 4 15:32:04 2015 +0800 ---------------------------------------------------------------------- README.md | 15 ++--- examples/rbm/rbm0.conf | 31 ++++++---- examples/rbm/rbm1.conf | 32 ++++++---- examples/rbm/rbm2.conf | 31 ++++++---- examples/rbm/rbm3.conf | 30 +++++---- include/neuralnet/neuron_layer.h | 10 +-- src/neuralnet/neuron_layer.cc | 111 ++++++++++++++++++---------------- src/proto/job.proto | 6 +- 8 files changed, 148 insertions(+), 118 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 449e624..f1f490b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ -##Apache SINGA - -Distributed deep learning system - -[Project Website](http://singa.incubator.apache.org) - -All the details can be found in project website. + +##Apache SINGA + +Distributed deep learning system + +[Project Website](http://singa.incubator.apache.org) + +All the details can be found in project website. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm0.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm0.conf b/examples/rbm/rbm0.conf index a7e503b..dba4f37 100644 --- a/examples/rbm/rbm0.conf +++ b/examples/rbm/rbm0.conf @@ -1,14 +1,14 @@ name: "rbm0" train_steps: 6000 test_steps:100 -test_freq:100 +test_freq:500 disp_freq: 100 train_one_batch{ alg: kCD } updater{ type: kSGD - momentum: 0.9 + momentum: 0.8 weight_decay: 0.0002 learning_rate{ base_lr: 0.1 @@ -54,12 +54,21 @@ layer{ type: kRBMVis srclayers:"mnist" srclayers:"RBMHid" + rbm_conf{ + hdim: 1000 + } param{ - name: "w1_" - share_from: "w1" + name: "w1" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } } + param{ name: "rb11" + wd_scale: 0 init{ type: kConstant value: 0.0 @@ -71,19 +80,17 @@ layer{ name: "RBMHid" type: kRBMHid srclayers:"RBMVis" - rbmhid_conf{ - hid_dim: 1000 + rbm_conf{ + hdim: 1000 } param{ - name: "w1" - init{ - type: kGaussian - mean: 0.0 - std: 0.1 - } + name: "w1_" + share_from: "w1" } + param{ name: "rb12" + wd_scale: 0 init{ type: kConstant value: 0.0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm1.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm1.conf b/examples/rbm/rbm1.conf index db27d3a..ac6c059 100644 --- a/examples/rbm/rbm1.conf +++ b/examples/rbm/rbm1.conf @@ -1,7 +1,7 @@ name: "rbm1" train_steps: 6000 test_steps:100 -test_freq:1000 +test_freq:500 disp_freq: 100 train_one_batch{ alg: kCD @@ -9,7 +9,7 @@ train_one_batch{ checkpoint_path: "examples/rbm/rbm0/checkpoint/step6000-worker0.bin" updater{ type: kSGD - momentum: 0.9 + momentum: 0.8 weight_decay: 0.0002 learning_rate{ base_lr: 0.1 @@ -76,12 +76,21 @@ layer{ type: kRBMVis srclayers:"sigmoid1" srclayers:"RBMHid" + rbm_conf{ + hdim: 500 + } param{ - name: "w2_" - share_from: "w2" + name: "w2" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } } + param{ name: "rb21" + wd_scale: 0 init{ type: kConstant value: 0.0 @@ -93,19 +102,16 @@ layer{ name: "RBMHid" type: kRBMHid srclayers:"RBMVis" - rbmhid_conf{ - hid_dim: 500 + rbm_conf{ + hdim: 500 } param{ - name: "w2" - init{ - type: kGaussian - mean: 0.0 - std: 0.1 - } + name: "w2_" + share_from: "w2" } - param{ + param{ name: "rb22" + wd_scale: 0 init{ type: kConstant value: 0.0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm2.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm2.conf b/examples/rbm/rbm2.conf index fd08907..96841ff 100644 --- a/examples/rbm/rbm2.conf +++ b/examples/rbm/rbm2.conf @@ -1,7 +1,7 @@ name: "rbm2" train_steps: 6000 test_steps:100 -test_freq:1000 +test_freq:500 disp_freq: 100 train_one_batch{ alg: kCD @@ -10,7 +10,7 @@ checkpoint_path: "examples/rbm/rbm1/checkpoint/step6000-worker0.bin" updater{ type: kSGD - momentum: 0.9 + momentum: 0.8 weight_decay: 0.0002 learning_rate{ base_lr: 0.1 @@ -98,12 +98,21 @@ layer{ type: kRBMVis srclayers:"sigmoid2" srclayers:"RBMHid" + rbm_conf{ + hdim: 250 + } param{ - name: "w3_" - share_from: "w3" + name: "w3" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } } + param{ name: "rb31" + wd_scale: 0 init{ type: kConstant value: 0.0 @@ -115,19 +124,17 @@ layer{ name: "RBMHid" type: kRBMHid srclayers:"RBMVis" - rbmhid_conf{ - hid_dim: 250 + rbm_conf{ + hdim: 250 } param{ - name: "w3" - init{ - type: kGaussian - mean: 0.0 - std: 0.1 - } + name: "w3_" + share_from: "w3" } + param{ name: "rb32" + wd_scale: 0 init{ type: kConstant value: 0.0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/examples/rbm/rbm3.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm3.conf b/examples/rbm/rbm3.conf index fe7cc1f..fa60114 100644 --- a/examples/rbm/rbm3.conf +++ b/examples/rbm/rbm3.conf @@ -1,7 +1,7 @@ name: "rbm3" train_steps: 6000 test_steps: 100 -test_freq: 1000 +test_freq: 500 disp_freq: 100 train_one_batch{ alg: kCD @@ -9,7 +9,7 @@ train_one_batch{ checkpoint_path: "examples/rbm/rbm2/checkpoint/step6000-worker0.bin" updater{ type: kSGD - momentum: 0.9 + momentum: 0.8 weight_decay: 0.0002 learning_rate{ base_lr: 0.001 @@ -118,13 +118,20 @@ layer{ type: kRBMVis srclayers:"sigmoid3" srclayers:"RBMHid" + rbm_conf{ + hdim: 30 + } param{ - name: "w4_" - share_from: "w4" - + name: "w4" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } } param{ name: "rb41" + wd_scale: 0 init{ type: kConstant value: 0.0 @@ -136,20 +143,17 @@ layer{ name: "RBMHid" type: kRBMHid srclayers:"RBMVis" - rbmhid_conf{ - hid_dim: 30 + rbm_conf{ + hdim: 30 gaussian: true } param{ - name: "w4" - init{ - type: kGaussian - mean: 0.0 - std: 0.1 - } + name: "w4_" + share_from: "w4" } param{ name: "rb42" + wd_scale: 0 init{ type: kConstant value: 0.0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/include/neuralnet/neuron_layer.h ---------------------------------------------------------------------- diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h index ce88ced..e5663d8 100644 --- a/include/neuralnet/neuron_layer.h +++ b/include/neuralnet/neuron_layer.h @@ -143,6 +143,7 @@ class SigmoidLayer: public Layer { class RBMLayer: public Layer { public: virtual ~RBMLayer() {} + void Setup(const LayerProto& proto, int npartitions) override; const Blob<float>& neg_data(const Layer* layer) { return neg_data_; } @@ -153,14 +154,17 @@ class RBMLayer: public Layer { std::vector<Param*> params{weight_, bias_}; return params; } - virtual Blob<float>* Sample(int flat) = 0; + virtual Blob<float>* Sample(int flat); protected: + //! if ture, sampling according to guassian distribution + bool gaussian_; //! dimension of the hidden layer int hdim_; //! dimension of the visible layer int vdim_; int batchsize_; + bool first_gibbs_; Param* weight_, *bias_; Blob<float> neg_data_; @@ -177,7 +181,6 @@ class RBMVisLayer: public RBMLayer { void Setup(const LayerProto& proto, int npartitions) override; void ComputeFeature(int flag, Metric* perf) override; void ComputeGradient(int flag, Metric* perf) override; - Blob<float>* Sample(int flat) override; private: RBMLayer* hid_layer_; @@ -192,11 +195,8 @@ class RBMHidLayer: public RBMLayer { void Setup(const LayerProto& proto, int npartitions) override; void ComputeFeature(int flag, Metric* perf) override; void ComputeGradient(int flag, Metric* perf) override; - Blob<float>* Sample(int flat) override; private: - // whether use gaussian sampling - bool gaussian_; RBMLayer *vis_layer_; }; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/src/neuralnet/neuron_layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc index 5feb14b..b86d7da 100644 --- a/src/neuralnet/neuron_layer.cc +++ b/src/neuralnet/neuron_layer.cc @@ -164,6 +164,35 @@ void DropoutLayer::ComputeGradient(int flag, Metric* perf) { auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this)); gsrc = grad * mask; } + + +/**************** Implementation for RBMLayer********************/ +Blob<float>* RBMLayer::Sample(int flag) { + Tensor<cpu, 2> sample, data; + if ((flag & kPositive) == kPositive || first_gibbs_) { + data = Tensor2(&data_); + sample = Tensor2(&sample_); + } else { + data = Tensor2(&neg_data_); + sample = Tensor2(&neg_sample_); + } + auto random = TSingleton<Random<cpu>>::Instance(); + if (gaussian_) { + random->SampleGaussian(sample, 0.0f, 1.0f); + sample += data; + } else { + random->SampleBinary(sample, data); + } + return (flag & kPositive) == kPositive || first_gibbs_ ? + &sample_ : &neg_sample_; +} +void RBMLayer::Setup(const LayerProto& proto, int npartitions) { + CHECK_EQ(npartitions, 1); // TODO test for npartitions > 1 + Layer::Setup(proto, npartitions); + hdim_ = proto.rbm_conf().hdim(); + gaussian_ = proto.rbm_conf().gaussian(); + first_gibbs_ = true; +} /**************** Implementation for RBMVisLayer********************/ RBMVisLayer::~RBMVisLayer() { delete weight_; @@ -171,7 +200,7 @@ RBMVisLayer::~RBMVisLayer() { } void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) { - Layer::Setup(proto, npartitions); + RBMLayer::Setup(proto, npartitions); CHECK_EQ(srclayers_.size(), 2); hid_layer_ = nullptr; for (auto src : srclayers_) { @@ -185,31 +214,23 @@ void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) { input_layer_ = srclayers_[0] != hid_layer_ ? srclayers_[0]: srclayers_[1]; const auto& src = input_layer_->data(this); batchsize_ = src.shape()[0]; - data_.ReshapeLike(src); // this is visible dimension + data_.ReshapeLike(src); neg_data_.ReshapeLike(data_); neg_sample_.ReshapeLike(data_); + vdim_ = src.count() / batchsize_; weight_ = Param::Create(proto.param(0)); + weight_ ->Setup(vector<int>{hdim_, vdim_}); bias_ = Param::Create(proto.param(1)); - bias_->Setup(vector<int>{src.count() / batchsize_}); -} -Blob<float>* RBMVisLayer::Sample(int flag) { - Tensor<cpu, 2> sample, data; - if ((flag & kPositive) == kPositive) { - LOG(FATAL) << "RBMVisLayer can not be sampled for positive flag"; - } else { - data = Tensor2(&neg_data_); - sample = Tensor2(&neg_sample_); - } - auto random = TSingleton<Random<cpu>>::Instance(); - random->SampleBinary(sample, data); - return &neg_sample_; + bias_->Setup(vector<int>{vdim_}); } + void RBMVisLayer::ComputeFeature(int flag, Metric* perf) { - if ((flag & kPositive) == kPositive) { /*positive flag*/ + if ((flag & kPositive) == kPositive) { data_.CopyFrom(input_layer_->data(this), true); - } else if ((flag & kNegative) == kNegative) { /*negative flag*/ - auto hid_sample = Tensor2(hid_layer_->Sample(flag)); + first_gibbs_ = true; + } else if ((flag & kNegative) == kNegative) { // fetch sampling results from hidden layer + auto hid_sample = Tensor2(hid_layer_->Sample(flag)); auto data = Tensor2(&neg_data_); auto weight = Tensor2(weight_->mutable_data()); auto bias = Tensor1(bias_->mutable_data()); @@ -224,15 +245,25 @@ void RBMVisLayer::ComputeFeature(int flag, Metric* perf) { } perf->Add("Squared Error", err / batchsize_); } + first_gibbs_ = false; } } void RBMVisLayer::ComputeGradient(int flag, Metric* perf) { auto vis_pos = Tensor2(&data_); auto vis_neg = Tensor2(&neg_data_); - auto gbias = Tensor1(bias_->mutable_grad()); + auto hid_pos = Tensor2(hid_layer_->mutable_data(this)); + auto hid_neg = Tensor2(hid_layer_->mutable_neg_data(this)); + + auto gbias = Tensor1(bias_->mutable_grad()); gbias = expr::sum_rows(vis_neg); gbias -= expr::sum_rows(vis_pos); + gbias /= batchsize_; + + auto gweight = Tensor2(weight_->mutable_grad()); + gweight = dot(hid_neg.T(), vis_neg); + gweight -= dot(hid_pos.T(), vis_pos); + gweight /= batchsize_; } /**************** Implementation for RBMHidLayer********************/ RBMHidLayer::~RBMHidLayer() { @@ -242,54 +273,36 @@ RBMHidLayer::~RBMHidLayer() { void RBMHidLayer::Setup(const LayerProto& proto, int npartitions) { - Layer::Setup(proto, npartitions); + RBMLayer::Setup(proto, npartitions); CHECK_EQ(srclayers_.size(), 1); const auto& src_data = srclayers_[0]->data(this); batchsize_ = src_data.shape()[0]; - vdim_ = src_data.count()/batchsize_; - hdim_ = proto.rbmhid_conf().hid_dim(); - gaussian_ = proto.rbmhid_conf().gaussian(); + vdim_ = src_data.count() / batchsize_; data_.Reshape(vector<int>{batchsize_, hdim_}); neg_data_.ReshapeLike(data_); sample_.ReshapeLike(data_); neg_sample_.ReshapeLike(data_); weight_ = Param::Create(proto.param(0)); + weight_->Setup(vector<int>{hdim_, vdim_}); bias_ = Param::Create(proto.param(1)); bias_->Setup(vector<int>{hdim_}); - weight_->Setup(vector<int>{hdim_, vdim_}); vis_layer_ = static_cast<RBMVisLayer*> (srclayers_[0]); } -Blob<float>* RBMHidLayer::Sample(int flag) { - Tensor<cpu, 2> sample, data; - if ((flag & kPositive) == kPositive) { - data = Tensor2(&data_); - sample = Tensor2(&sample_); - } else { - data = Tensor2(&neg_data_); - sample = Tensor2(&neg_sample_); - } - auto random = TSingleton<Random<cpu>>::Instance(); - if (gaussian_) { // first gibbs - random->SampleGaussian(sample, 0.0f, 1.0f); - sample += data; - } else { - random->SampleBinary(sample, data); - } - return (flag & kPositive) == kPositive ? &sample_ : &neg_sample_; -} - void RBMHidLayer::ComputeFeature(int flag, Metric* perf) { auto weight = Tensor2(weight_->mutable_data()); auto bias = Tensor1(bias_->mutable_data()); Tensor<cpu, 2> data, src; - if ((flag & kPositive) == kPositive) { /*postive flag*/ + if ((flag & kPositive) == kPositive) { data = Tensor2(&data_); src = Tensor2(vis_layer_->mutable_data(this)); + first_gibbs_ = true; } else { data = Tensor2(&neg_data_); - src = Tensor2(vis_layer_->Sample(flag)); + // hinton's science paper does not sample the vis layer + src = Tensor2(vis_layer_->mutable_neg_data(this)); + first_gibbs_ = false; } data = dot(src, weight.T()); data += expr::repmat(bias, batchsize_); @@ -301,18 +314,10 @@ void RBMHidLayer::ComputeFeature(int flag, Metric* perf) { void RBMHidLayer::ComputeGradient(int flag, Metric* perf) { auto hid_pos = Tensor2(&data_); auto hid_neg = Tensor2(&neg_data_); - auto vis_pos = Tensor2(vis_layer_->mutable_data(this)); - auto vis_neg = Tensor2(vis_layer_->mutable_data(this)); - auto gbias = Tensor1(bias_->mutable_grad()); gbias = expr::sum_rows(hid_neg); gbias -= expr::sum_rows(hid_pos); gbias /= batchsize_; - - auto gweight = Tensor2(weight_->mutable_grad()); - gweight = dot(hid_neg.T(), vis_neg); - gweight -= dot(hid_pos.T(), vis_pos); - gweight /= batchsize_; } /*********** Implementation for InnerProductLayer**********/ InnerProductLayer::~InnerProductLayer() { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ae203036/src/proto/job.proto ---------------------------------------------------------------------- diff --git a/src/proto/job.proto b/src/proto/job.proto index 6d45963..dcee10b 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -185,7 +185,7 @@ message LayerProto { // configuration for prefetch layer optional PrefetchProto prefetch_conf = 44; // configuration for rbmhid layer - optional RBMHidProto rbmhid_conf = 49; + optional RBMProto rbm_conf = 49; // configuration for rectified linear unit layer optional ReLUProto relu_conf = 38; // configuration for rgb image parser layer @@ -357,8 +357,8 @@ message DropoutProto { optional float dropout_ratio = 30 [default = 0.5]; } -message RBMHidProto { - optional int32 hid_dim = 1; // The number of outputs for the layer +message RBMProto { + required int32 hdim = 1; // The number of outputs for the layer optional bool bias_term = 2 [default = true]; // whether to have bias terms optional bool gaussian = 3 [default = false]; // use gaussian sampling or not }
