Repository: incubator-singa Updated Branches: refs/heads/master 6afa895b8 -> fbbcaafdb
SINGA-9 Add Support for Restricted Boltzman Machine (RBM) model This is to implement RBM in SINGA. To training RBM models, the Contrastive Divergence (CD) algorithm is implemented. We have implemented a BPWorker to run the Back-Propagation algorithm. To implement the CD algorithm, we follow the same way to create a CDWorker whose RunOneBatch function controls the logic of the CD algorithm, including positive phase, negative phase and computing gradient phase. RBM's layers are different to the layers for feed-forward neural networks, hence new layers for RBM models are added. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ef4de796 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ef4de796 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ef4de796 Branch: refs/heads/master Commit: ef4de796303550b1f3e31fd2fddd9eb831db2b06 Parents: 6afa895 Author: zhaojing <[email protected]> Authored: Sun Aug 16 15:42:20 2015 +0800 Committer: zhaojing <[email protected]> Committed: Thu Aug 20 16:56:03 2015 +0800 ---------------------------------------------------------------------- examples/rbm/autoencoder.conf | 299 +++++++++++++++++++++++++++++++++++++ examples/rbm/rbm0.conf | 103 +++++++++++++ examples/rbm/rbm1.conf | 135 +++++++++++++++++ examples/rbm/rbm2.conf | 167 +++++++++++++++++++++ examples/rbm/rbm3.conf | 198 ++++++++++++++++++++++++ include/neuralnet/layer.h | 57 ++++++- include/trainer/worker.h | 6 +- src/driver.cc | 6 +- src/neuralnet/layer.cc | 191 +++++++++++++++++------ src/proto/job.proto | 21 ++- src/trainer/worker.cc | 22 ++- 11 files changed, 1133 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/autoencoder.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/autoencoder.conf b/examples/rbm/autoencoder.conf new file mode 100644 index 0000000..9575323 --- /dev/null +++ b/examples/rbm/autoencoder.conf @@ -0,0 +1,299 @@ +name: "deep-big-simple-mlp" +train_steps: 12200 +test_steps:100 +test_freq:100 +disp_freq:20 +checkpoint_after: 1000 +checkpoint_freq: 1000 +checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin" +checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin" +checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin" +checkpoint_path: "examples/rbm/checkpoint/rbm3/checkpoint/step6000-worker0.bin" +alg: kBP +updater{ + type: kAdaGrad + learning_rate{ + base_lr: 0.01 + type: kFixed + } +} + +neuralnet { + layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_train_shard" + batchsize: 1000 + } + exclude: kTest + } + + layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_test_shard" + batchsize: 1000 + } + exclude: kTrain + } + + layer{ + name:"mnist" + type: kMnist + srclayers: "data" + mnist_conf { + norm_a: 255 + norm_b: 0 + } + } + + layer{ + name: "label" + type: kLabel + srclayers: "data" + } + + layer{ + name: "fc1" + type: kInnerProduct + srclayers:"mnist" + innerproduct_conf{ + num_output: 1000 + } + param{ + name: "w1" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb12" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid1" + type: kSigmoid + srclayers:"fc1" + } + layer{ + name: "fc2" + type: kInnerProduct + srclayers:"sigmoid1" + innerproduct_conf{ + num_output: 500 + } + param{ + name: "w2" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb22" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid2" + type: kSigmoid + srclayers:"fc2" + } + + layer{ + name: "fc3" + type: kInnerProduct + srclayers:"sigmoid2" + innerproduct_conf{ + num_output: 250 + } + param{ + name: "w3" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb32" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid3" + type: kSigmoid + srclayers:"fc3" + } + + layer{ + name: "fc4" + type: kInnerProduct + srclayers:"sigmoid3" + innerproduct_conf{ + num_output: 30 + } + param{ + name: "w4" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb42" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "fc5" + type: kInnerProduct + #srclayers:"sigmoid4" + srclayers:"fc4" + innerproduct_conf{ + num_output: 250 + transpose: true + } + param{ + name: "w5" + share_from: "w4" + } + param{ + name: "rb41" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid5" + type: kSigmoid + srclayers:"fc5" + } + layer{ + name: "fc6" + type: kInnerProduct + srclayers:"sigmoid5" + innerproduct_conf{ + num_output: 500 + transpose: true + } + param{ + name: "w6" + share_from: "w3" + } + param{ + name: "rb31" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + + } + + layer{ + name: "sigmoid6" + type: kSigmoid + srclayers:"fc6" + } + layer{ + name: "fc7" + type: kInnerProduct + srclayers:"sigmoid6" + innerproduct_conf{ + num_output: 1000 + transpose: true + } + param{ + name: "w7" + share_from: "w2" + } + param{ + name: "rb21" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + + } + + layer{ + name: "sigmoid7" + type: kSigmoid + srclayers:"fc7" + } + layer{ + name: "fc8" + type: kInnerProduct + srclayers:"sigmoid7" + innerproduct_conf{ + num_output: 784 + transpose: true + } + param{ + name: "w8" + share_from: "w1" + } + param{ + name: "rb11" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + + } + + layer{ + name: "sigmoid8" + type: kSigmoid + srclayers:"fc8" + } + + layer{ + name: "loss" + type:kEuclideanLoss + srclayers:"sigmoid8" + srclayers:"mnist" + } +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + workspace: "examples/rbm/checkpoint/autoencoder/" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm0.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm0.conf b/examples/rbm/rbm0.conf new file mode 100644 index 0000000..ef8653f --- /dev/null +++ b/examples/rbm/rbm0.conf @@ -0,0 +1,103 @@ +name: "deep-big-simple-dbm" +train_steps: 6000 +test_steps:100 +test_freq:100 +disp_freq: 100 +alg: kCD +checkpoint_after: 500 +checkpoint_freq: 1000 +updater{ + type: kSGD + momentum: 0.9 + weight_decay: 0.0002 + learning_rate{ + base_lr: 0.1 + type: kFixed + } +} + +neuralnet { +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_train_shard" + batchsize: 100 + } + exclude: kTest +} + + +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_test_shard" + batchsize: 100 + } + exclude: kTrain +} + + +layer{ + name:"mnist" + type: kMnist + srclayers: "data" + mnist_conf { + norm_a: 255 + norm_b: 0 + } +} + +layer{ + name: "RBMVis" + type: kRBMVis + srclayers:"mnist" + srclayers:"RBMHid" + rbmvis_conf{ + num_output: 1000 + } + param{ + name: "w1" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } + } + param{ + name: "rb11" + init{ + type: kConstant + value: 0.0 + } + } +} + +layer{ + name: "RBMHid" + type: kRBMHid + srclayers:"RBMVis" + rbmhid_conf{ + hid_dim: 1000 + } + param{ + name: "w1_1" + share_from: "w1" + } + param{ + name: "rb12" + init{ + type: kConstant + value: 0.0 + } + } +} +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + nservers_per_group: 1 + nworkers_per_group: 1 + workspace: "examples/rbm/checkpoint/rbm0/" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm1.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm1.conf b/examples/rbm/rbm1.conf new file mode 100644 index 0000000..f9b4974 --- /dev/null +++ b/examples/rbm/rbm1.conf @@ -0,0 +1,135 @@ +name: "deep-big-simple-dbm" +train_steps: 6000 +test_steps:100 +test_freq:500 +disp_freq: 100 +alg: kCD +checkpoint_after: 500 +checkpoint_freq: 1000 +checkpoint_path: "examples/rbm/checkpoint/rbm0/checkpoint/step6000-worker0.bin" +updater{ + type: kSGD + momentum: 0.9 + weight_decay: 0.0002 + learning_rate{ + base_lr: 0.1 + type: kFixed + } +} + +neuralnet { +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_train_shard" + batchsize: 100 + } + exclude: kTest +} + + +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_test_shard" + batchsize: 100 + } + exclude: kTrain +} + + +layer{ + name:"mnist" + type: kMnist + srclayers: "data" + mnist_conf { + norm_a: 255 + norm_b: 0 + } +} + +layer{ + name: "fc1" + type: kInnerProduct + srclayers:"mnist" + innerproduct_conf{ + num_output: 1000 + } + param{ + name: "w1" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb12" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid1" + type: kSigmoid + srclayers:"fc1" + } + +layer{ + name: "RBMVis" + type: kRBMVis + srclayers:"sigmoid1" + srclayers:"RBMHid" + rbmvis_conf{ + num_output: 500 + } + param{ + name: "w2" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } + } + param{ + name: "rb21" + init{ + type: kConstant + value: 0.0 + } + } +} + +layer{ + name: "RBMHid" + type: kRBMHid + srclayers:"RBMVis" + rbmhid_conf{ + hid_dim: 500 + } + param{ + name: "w2_1" + share_from: "w2" + } + param{ + name: "rb22" + init{ + type: kConstant + value: 0.0 + } + } +} +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + nservers_per_group: 1 + nworkers_per_group: 1 + workspace: "examples/rbm/checkpoint/rbm1/" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm2.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm2.conf b/examples/rbm/rbm2.conf new file mode 100644 index 0000000..6629481 --- /dev/null +++ b/examples/rbm/rbm2.conf @@ -0,0 +1,167 @@ +name: "deep-big-simple-dbm" +train_steps: 6000 +test_steps:100 +test_freq:100 +disp_freq: 100 +alg: kCD +checkpoint_after: 500 +checkpoint_freq: 1000 +checkpoint_path: "examples/rbm/checkpoint/rbm1/checkpoint/step6000-worker0.bin" + +updater{ + type: kSGD + momentum: 0.9 + weight_decay: 0.0002 + learning_rate{ + base_lr: 0.1 + type: kFixed + } +} + + +neuralnet { +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_train_shard" + batchsize: 100 + } + exclude: kTest +} + + +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_test_shard" + batchsize: 100 + } + exclude: kTrain +} + + +layer{ + name:"mnist" + type: kMnist + srclayers: "data" + mnist_conf { + norm_a: 255 + norm_b: 0 + } +} + +layer{ + name: "fc1" + type: kInnerProduct + srclayers:"mnist" + innerproduct_conf{ + num_output: 1000 + } + param{ + name: "w1" + init { + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb12" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid1" + type: kSigmoid + srclayers:"fc1" + } + +layer{ + name: "fc2" + type: kInnerProduct + srclayers:"sigmoid1" + innerproduct_conf{ + num_output: 500 + } + param{ + name: "w2" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb22" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid2" + type: kSigmoid + srclayers:"fc2" + } +layer{ + name: "RBMVis" + type: kRBMVis + srclayers:"sigmoid2" + srclayers:"RBMHid" + rbmvis_conf{ + num_output: 250 + } + param{ + name: "w3" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } + } + param{ + name: "rb31" + init{ + type: kConstant + value: 0.0 + } + } +} + +layer{ + name: "RBMHid" + type: kRBMHid + srclayers:"RBMVis" + rbmhid_conf{ + hid_dim: 250 + } + param{ + name: "w3_1" + share_from: "w3" + } + param{ + name: "rb32" + init{ + type: kConstant + value: 0.0 + } + } +} +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + nservers_per_group: 1 + nworkers_per_group: 1 + workspace: "examples/rbm/checkpoint/rbm2/" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/examples/rbm/rbm3.conf ---------------------------------------------------------------------- diff --git a/examples/rbm/rbm3.conf b/examples/rbm/rbm3.conf new file mode 100644 index 0000000..482c5e7 --- /dev/null +++ b/examples/rbm/rbm3.conf @@ -0,0 +1,198 @@ +name: "deep-big-simple-dbm" +train_steps: 6000 +test_steps: 100 +test_freq: 100 +disp_freq: 100 +alg: kCD +checkpoint_after: 500 +checkpoint_freq: 1000 +checkpoint_path: "examples/rbm/checkpoint/rbm2/checkpoint/step6000-worker0.bin" +updater{ + type: kSGD + momentum: 0.9 + weight_decay: 0.0002 + learning_rate{ + base_lr: 0.001 + type: kFixed + } +} + +neuralnet { +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_train_shard" + batchsize: 100 + } + exclude: kTest +} + + +layer { + name: "data" + type: kShardData + sharddata_conf { + path: "examples/rbm/mnist_test_shard" + batchsize: 100 + } + exclude: kTrain +} + + +layer{ + name:"mnist" + type: kMnist + srclayers: "data" + mnist_conf { + norm_a: 255 + norm_b: 0 + } +} + +layer{ + name: "fc1" + type: kInnerProduct + srclayers:"mnist" + innerproduct_conf{ + num_output: 1000 + } + param{ + name: "w1" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb12" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid1" + type: kSigmoid + srclayers:"fc1" + } + +layer{ + name: "fc2" + type: kInnerProduct + srclayers:"sigmoid1" + innerproduct_conf{ + num_output: 500 + } + param{ + name: "w2" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb22" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid2" + type: kSigmoid + srclayers:"fc2" + } + +layer{ + name: "fc3" + type: kInnerProduct + srclayers:"sigmoid2" + innerproduct_conf{ + num_output: 250 + } + param{ + name: "w3" + init{ + type: kUniform + low:-0.05 + high:0.05 + } + } + param{ + name: "rb32" + init{ + type: kUniform + low: -0.05 + high:0.05 + } + } + } + + layer{ + name: "sigmoid3" + type: kSigmoid + srclayers:"fc3" + } + +layer{ + name: "RBMVis" + type: kRBMVis + srclayers:"sigmoid3" + srclayers:"RBMHid" + rbmvis_conf{ + num_output: 30 + } + param{ + name: "w4" + init{ + type: kGaussian + mean: 0.0 + std: 0.1 + } + } + param{ + name: "rb41" + init{ + type: kConstant + value: 0.0 + } + } +} + +layer{ + name: "RBMHid" + type: kRBMHid + srclayers:"RBMVis" + rbmhid_conf{ + hid_dim: 30 + gaussian: true + } + param{ + name: "w4_1" + share_from: "w4" + } + param{ + name: "rb42" + init{ + type: kConstant + value: 0.0 + } + } +} +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + nservers_per_group: 1 + nworkers_per_group: 1 + workspace: "examples/rbm/checkpoint/rbm3/" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/include/neuralnet/layer.h ---------------------------------------------------------------------- diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h index 118da56..b1fbbb0 100644 --- a/include/neuralnet/layer.h +++ b/include/neuralnet/layer.h @@ -69,8 +69,8 @@ class DropoutLayer: public Layer { Blob<float> mask_; }; /** - * RBM visible layer - */ + * RBM visible layer + */ class RBMVisLayer: public Layer { public: using Layer::ComputeFeature; @@ -108,6 +108,7 @@ class RBMVisLayer: public Layer { ~RBMVisLayer(); + private: //! dimension of the hidden layer int hdim_; @@ -127,8 +128,8 @@ class RBMVisLayer: public Layer { // in order to implement Persistent Contrastive Divergence, }; /** - * RBM hidden layer - */ + * RBM hidden layer + */ class RBMHidLayer: public Layer { public: using Layer::ComputeFeature; @@ -154,7 +155,7 @@ class RBMHidLayer: public Layer { return data_; else return hid_sample_; - } + } const vector<Param*> GetParams() const override { vector<Param*> params{weight_, bias_}; return params; @@ -169,6 +170,8 @@ class RBMHidLayer: public Layer { // batchsize of negative phase int neg_batchsize_; float scale_; + // whether use gaussian sampling + bool gaussian_; Blob<float> hid_sample_; Param* weight_, *bias_; }; @@ -184,7 +187,6 @@ class InnerProductLayer: public Layer { void ComputeFeature(Phase phase, Metric *perf) override; void ComputeGradient(Phase phase) override; - ConnectionType src_neuron_connection(int k) const override { // CHECK_LT(k, srclayers_.size()); return kOneToAll; @@ -201,6 +203,7 @@ class InnerProductLayer: public Layer { //! dimension of the visible layer int vdim_; int batchsize_; + bool transpose_; Param* weight_, *bias_; }; @@ -246,7 +249,9 @@ class MnistLayer: public ParserLayer { void Setup(const LayerProto& proto, int npartitions) override; void ParseRecords(Phase phase, const vector<Record>& records, Blob<float>* blob) override; - + ConnectionType dst_layer_connection() const override { + return kOneToMany; + } protected: // height and width of the image after deformation // kernel size for elastic distortion @@ -283,6 +288,29 @@ class ReLULayer: public Layer { void ComputeGradient(Phase phase) override; }; +class EuclideanLossLayer: public LossLayer { + public: + using Layer::ComputeFeature; + using Layer::ComputeGradient; + + void Setup(const LayerProto& proto, int npartitions) override; + void ComputeFeature(Phase phase, Metric *perf) override; + void ComputeGradient(Phase phase) override; + + + int partition_dim() const override { + CHECK_LE(layer_proto_.partition_dim(), 1); + return layer_proto_.partition_dim(); + } + ConnectionType src_neuron_connection(int k) const override { + // CHECK_LT(k, srclayers_.size()); + return kOneToAll; + } + + private: + int batchsize_; + int dim_; +}; class SoftmaxLossLayer: public LossLayer { /* @@ -344,6 +372,21 @@ class ShardDataLayer: public DataLayer{ }; /** + * This layer apply Sigmoid function to neuron activations. + * f(x)=1/(1+exp(-x)) + * f'(x)=f(x)*(1-f(x)) + */ +class SigmoidLayer: public Layer { + public: + using Layer::ComputeFeature; + using Layer::ComputeGradient; + + void Setup(const LayerProto& proto, int npartitions) override; + void ComputeFeature(Phase phase, Metric *perf) override; + void ComputeGradient(Phase phase) override; +}; + +/** * This layer apply Tan function to neuron activations. * f(x)=A tanh(Bx) * f'(x)=B/A (A*A-f(x)*f(x)) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/include/trainer/worker.h ---------------------------------------------------------------------- diff --git a/include/trainer/worker.h b/include/trainer/worker.h index cc5a745..86b1c90 100644 --- a/include/trainer/worker.h +++ b/include/trainer/worker.h @@ -193,10 +193,10 @@ class BPWorker: public Worker{ class CDWorker: public Worker{ public: ~CDWorker() {} - void Init(int thread_id, int group_id, int worker_id) override; + void Init(int thread_id, int grp_id, int id) override; void TrainOneBatch(int step, Metric* perf) override; - void TestOneBatch(int step, Phase phase, - shared_ptr<NeuralNet> net, Metric* perf) override; + void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net, + Metric* perf) override; void PositivePhase(int step, shared_ptr<NeuralNet> net, Metric* perf); void NegativePhase(int step, shared_ptr<NeuralNet> net, Metric* perf); void GradientPhase(int step, shared_ptr<NeuralNet> net); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/driver.cc ---------------------------------------------------------------------- diff --git a/src/driver.cc b/src/driver.cc index 1bc712d..e5045a3 100644 --- a/src/driver.cc +++ b/src/driver.cc @@ -31,21 +31,23 @@ void Driver::Init(int argc, char **argv) { RegisterLayer<ConvolutionLayer, int>(kConvolution); RegisterLayer<ConcateLayer, int>(kConcate); RegisterLayer<DropoutLayer, int>(kDropout); + RegisterLayer<EuclideanLossLayer, int>(kEuclideanLoss); RegisterLayer<InnerProductLayer, int>(kInnerProduct); RegisterLayer<LabelLayer, int>(kLabel); RegisterLayer<LRNLayer, int>(kLRN); RegisterLayer<MnistLayer, int>(kMnist); RegisterLayer<PrefetchLayer, int>(kPrefetch); RegisterLayer<PoolingLayer, int>(kPooling); + RegisterLayer<RBMHidLayer, int>(kRBMHid); + RegisterLayer<RBMVisLayer, int>(kRBMVis); RegisterLayer<RGBImageLayer, int>(kRGBImage); RegisterLayer<ReLULayer, int>(kReLU); RegisterLayer<ShardDataLayer, int>(kShardData); + RegisterLayer<SigmoidLayer, int>(kSigmoid); RegisterLayer<SliceLayer, int>(kSlice); RegisterLayer<SoftmaxLossLayer, int>(kSoftmaxLoss); RegisterLayer<SplitLayer, int>(kSplit); RegisterLayer<TanhLayer, int>(kTanh); - RegisterLayer<RBMVisLayer, int>(kRBMVis); - RegisterLayer<RBMHidLayer, int>(kRBMHid); #ifdef USE_LMDB RegisterLayer<LMDBDataLayer, int>(kLMDBData); #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/neuralnet/layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc index ae45ae8..b5c986e 100644 --- a/src/neuralnet/layer.cc +++ b/src/neuralnet/layer.cc @@ -163,6 +163,7 @@ RBMVisLayer::~RBMVisLayer() { delete weight_; delete bias_; } + void RBMVisLayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); @@ -188,7 +189,7 @@ void RBMVisLayer::Setup(const LayerProto& proto, vis_sample_.Reshape(vector<int>{neg_batchsize_, vdim_}); weight_ = Param::Create(proto.param(0)); bias_ = Param::Create(proto.param(1)); - weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_}); + weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_}); bias_->Setup(proto.param(1), vector<int>{vdim_}); } @@ -199,24 +200,15 @@ void RBMVisLayer::ComputeFeature(Phase phase, Metric* perf) { auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this)); Copy(data, src); } else if (phase == kNegative) { /*negative phase*/ - if (is_first_iteration_vis_) { - CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_); - auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this)); - auto vis_sample = Tensor2(&vis_sample_); - Copy(vis_sample, src); - is_first_iteration_vis_ = false; - } else { - auto hid_sample = - Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative)); - // fetch sampling results from hidden layer - auto vis_sample = Tensor2(&vis_sample_); - auto weight = Tensor2(weight_->mutable_data()); - auto bias = Tensor1(bias_->mutable_data()); - vis_sample = dot(hid_sample, weight.T()); - vis_sample+=repmat(bias, neg_batchsize_); - vis_sample = F<op::sigmoid>(vis_sample); - TSingleton<Random<cpu>>::Instance()->SampleBinary(vis_sample); - } + auto hid_sample = + Tensor2(srclayers_[hid_idx_]->mutable_data(this, kNegative)); + // fetch sampling results from hidden layer + auto vis_sample = Tensor2(&vis_sample_); + auto weight = Tensor2(weight_->mutable_data()); + auto bias = Tensor1(bias_->mutable_data()); + vis_sample = dot(hid_sample, weight); + vis_sample+=repmat(bias, neg_batchsize_); + vis_sample = F<op::sigmoid>(vis_sample); } } @@ -231,14 +223,14 @@ void RBMVisLayer::ComputeGradient(Phase phase) { auto gbias = Tensor1(bias_->mutable_grad()); gbias = sum_rows(vis_sample); gbias -= sum_rows(data); - gweight = dot(vis_sample.T(), hid_sample); - gweight -= dot(data.T(), hid_data); + gweight = dot(hid_sample.T(), vis_sample); + gweight -= dot(hid_data.T(), data); gbias*=(1.0f)/(1.0f*batchsize_); gweight*=(1.0f)/(1.0f*batchsize_); } void RBMVisLayer::ComputeLoss(Metric* perf) { - float loss = (0.0f); + float loss_sqr = (0.0f); CHECK_EQ(srclayers_[data_idx_]->data(this).count(), batchsize_*vdim_); auto src = Tensor2(srclayers_[data_idx_]->mutable_data(this)); auto hid_data = Tensor2(srclayers_[hid_idx_]->mutable_data(this, kPositive)); @@ -247,24 +239,26 @@ void RBMVisLayer::ComputeLoss(Metric* perf) { auto bias = Tensor1(bias_->mutable_data()); Tensor<cpu, 2> reconstruct(Shape2(batchsize_, vdim_)); /*reconstruct error*/ AllocSpace(reconstruct); - reconstruct = dot(hid_data, weight.T()); + reconstruct = dot(hid_data, weight); reconstruct+=repmat(bias, batchsize_); reconstruct = F<op::sigmoid>(reconstruct); float *src_dptr = src.dptr; - float *reconstruct_dptr = reconstruct.dptr; - for (int i = 0; i < vdim_*batchsize_; i++) - loss += -(src_dptr[i]*log(reconstruct_dptr[i]) - +(1-src_dptr[i])*log(1-reconstruct_dptr[i])); - loss/=batchsize_; + for (int i = 0; i < vdim_*batchsize_; i++) { + int recon_row = i / vdim_; + int recon_col = i - recon_row * vdim_; + loss_sqr += (src_dptr[i] - reconstruct[recon_row][recon_col]) * + (src_dptr[i] - reconstruct[recon_row][recon_col]); + } FreeSpace(reconstruct); perf->Reset(); - perf->Add("reconstruct_error", loss); + perf->Add("sqr_reconstruct_error", loss_sqr); } /**************** Implementation for RBMHidLayer********************/ RBMHidLayer::~RBMHidLayer() { delete weight_; delete bias_; } + void RBMHidLayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); @@ -276,24 +270,44 @@ void RBMHidLayer::Setup(const LayerProto& proto, neg_batchsize_ = src_sample.shape()[0]; vdim_ = src_data.count()/batchsize_; hdim_ = proto.rbmhid_conf().hid_dim(); + gaussian_ = proto.rbmhid_conf().gaussian(); data_.Reshape(vector<int>{batchsize_, hdim_}); hid_sample_.Reshape(vector<int>{neg_batchsize_, hdim_}); weight_ = Param::Create(proto.param(0)); bias_ = Param::Create(proto.param(1)); - weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_}); bias_->Setup(proto.param(1), vector<int>{hdim_}); + weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_}); } void RBMHidLayer::ComputeFeature(Phase phase, Metric* perf) { if (phase == kPositive) { /*postive phase*/ auto data = Tensor2(&data_); + + auto hid_sample = Tensor2(&hid_sample_); + CHECK_EQ(srclayers_[0]->data(this, kPositive).count(), batchsize_*vdim_); auto src = Tensor2(srclayers_[0]->mutable_data(this, kPositive)); auto weight = Tensor2(weight_->mutable_data()); auto bias = Tensor1(bias_->mutable_data()); - data = dot(src, weight); + data = dot(src, weight.T()); data += repmat(bias, batchsize_); - data = F<op::sigmoid>(data); + + if (!gaussian_) + data = F<op::sigmoid>(data); + + Copy(hid_sample, data); + + if (gaussian_) { // first gibbs + Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_)); + AllocSpace(gaussian_sample); + auto random = TSingleton<Random<cpu>>::Instance(); + random->SampleGaussian(gaussian_sample, 0.0f, 1.0f); + hid_sample += gaussian_sample; + FreeSpace(gaussian_sample); + } else { + TSingleton<Random<cpu>>::Instance()->SampleBinary(hid_sample); + } + } else if (phase == kNegative) { /*negative phase*/ CHECK_EQ(srclayers_[0]->data(this, kNegative).count(), neg_batchsize_*vdim_); @@ -301,15 +315,25 @@ void RBMHidLayer::ComputeFeature(Phase phase, Metric* perf) { auto hid_sample = Tensor2(&hid_sample_); auto bias = Tensor1(bias_->mutable_data()); auto weight = Tensor2(weight_->mutable_data()); - hid_sample = dot(src_sample, weight); + hid_sample = dot(src_sample, weight.T()); hid_sample += repmat(bias, neg_batchsize_); - hid_sample = F<op::sigmoid>(hid_sample); - TSingleton<Random<cpu>>::Instance()->SampleBinary(hid_sample); + if (!gaussian_) + hid_sample = F<op::sigmoid>(hid_sample); } else if (phase == kLoss) { /*test phase*/ - auto data = Tensor2(&data_); // data: sigmoid(Wv+b) - TSingleton<Random<cpu>>::Instance()->SampleBinary(data); + auto data = Tensor2(&data_); // data: sigmoid(Wv+b) + if (gaussian_) { + Tensor<cpu, 2> gaussian_sample(Shape2(batchsize_, hdim_)); + AllocSpace(gaussian_sample); + auto random = TSingleton<Random<cpu>>::Instance(); + random->SampleGaussian(gaussian_sample, 0.0f, 1.0f); + data += gaussian_sample; + FreeSpace(gaussian_sample); + } + else + TSingleton<Random<cpu>>::Instance()->SampleBinary(data); } } + void RBMHidLayer::ComputeGradient(Phase phase) { auto data = Tensor2(&data_); auto hid_sample = Tensor2(&hid_sample_); @@ -326,17 +350,21 @@ InnerProductLayer::~InnerProductLayer() { void InnerProductLayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); CHECK_EQ(srclayers_.size(), 1); - const auto& src=srclayers_[0]->data(this); - batchsize_=src.shape()[0]; - vdim_=src.count()/batchsize_; - hdim_=proto.innerproduct_conf().num_output(); - if(partition_dim()>0) + const auto& src = srclayers_[0]->data(this); + batchsize_ = src.shape()[0]; + vdim_ = src.count()/batchsize_; + hdim_ = proto.innerproduct_conf().num_output(); + transpose_ = proto.innerproduct_conf().transpose(); + if (partition_dim() > 0) hdim_ /= npartitions; data_.Reshape(vector<int>{batchsize_, hdim_}); grad_.ReshapeLike(data_); weight_ = Param::Create(proto.param(0)); bias_ = Param::Create(proto.param(1)); - weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_}); + if (transpose_) + weight_->Setup(proto.param(0), vector<int>{vdim_, hdim_}); + else + weight_->Setup(proto.param(0), vector<int>{hdim_, vdim_}); bias_->Setup(proto.param(1), vector<int>{hdim_}); } @@ -345,7 +373,10 @@ void InnerProductLayer::ComputeFeature(Phase phase, Metric* perf) { auto src = Tensor2(srclayers_[0]->mutable_data(this)); auto weight = Tensor2(weight_->mutable_data()); auto bias = Tensor1(bias_->mutable_data()); - data=dot(src, weight.T()); + if (transpose_) + data = dot(src, weight); + else + data = dot(src, weight.T()); // repmat: repeat bias vector into batchsize rows data+=repmat(bias, batchsize_); } @@ -357,11 +388,17 @@ void InnerProductLayer::ComputeGradient(Phase phas) { auto gweight = Tensor2(weight_->mutable_grad()); auto gbias = Tensor1(bias_->mutable_grad()); - gbias=sum_rows(grad); - gweight=dot(grad.T(), src); - if(srclayers_[0]->mutable_grad(this)!=nullptr){ + gbias = sum_rows(grad); + if (transpose_) + gweight = dot(src.T(), grad); + else + gweight = dot(grad.T(), src); + if (srclayers_[0]->mutable_grad(this) != nullptr) { auto gsrc = Tensor2(srclayers_[0]->mutable_grad(this)); - gsrc=dot(grad, weight); + if (transpose_) + gsrc = dot(grad, weight.T()); + else + gsrc = dot(grad, weight); } } /***************************************************************************** @@ -703,6 +740,25 @@ ShardDataLayer::~ShardDataLayer() { delete shard_; shard_ = nullptr; } +/*******************Implementation of SigmoidLayer***************************/ +void SigmoidLayer::Setup(const LayerProto& proto, int npartitions) { + Layer::Setup(proto, npartitions); + data_.ReshapeLike(srclayers_[0]->data(this)); + grad_.ReshapeLike(srclayers_[0]->grad(this)); +} + +void SigmoidLayer::ComputeFeature(Phase phase, Metric* perf) { + auto data = Tensor1(&data_); + auto src = Tensor1(srclayers_[0]->mutable_data(this)); + data = F<op::sigmoid>(src); +} + +void SigmoidLayer::ComputeGradient(Phase phase) { + auto data = Tensor1(&data_); + auto grad = Tensor1(&grad_); + auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this)); + gsrc = F<op::sigmoid_grad>(data)*grad; +} /*******************Implementation of TanLayer***************************/ void TanhLayer::Setup(const LayerProto& proto, int npartitions){ Layer::Setup(proto, npartitions); @@ -722,6 +778,45 @@ void TanhLayer::ComputeGradient(Phase phase) { auto gsrc = Tensor1(srclayers_[0]->mutable_grad(this)); gsrc=F<op::stanh_grad>(data)*grad; } +/********** * Implementation for EuclideanLossLayer*************************/ +void EuclideanLossLayer::Setup(const LayerProto& proto, int npartitions) { + LossLayer::Setup(proto, npartitions); + CHECK_EQ(srclayers_.size(), 2); + data_.Reshape(srclayers_[0]->data(this).shape()); + batchsize_ = data_.shape()[0]; + dim_ = data_.count()/batchsize_; + metric_.Reshape(vector<int>{1}); +} +void EuclideanLossLayer::ComputeFeature(Phase phase, Metric* perf) { + const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data(); + const float* input_dptr = srclayers_[1]->data(this).cpu_data(); + float loss = 0; + for (int n = 0; n < batchsize_; n++) { + for (int j = 0; j < dim_; ++j) { + loss += (input_dptr[j] - reconstruct_dptr[j]) * + (input_dptr[j] - reconstruct_dptr[j]); + } + reconstruct_dptr+=dim_; + input_dptr+=dim_; + } + CHECK_EQ(reconstruct_dptr, + srclayers_[0]->data(this).cpu_data() + (batchsize_*dim_)); + CHECK_EQ(input_dptr, + srclayers_[1]->data(this).cpu_data() + (batchsize_*dim_)); + perf->Add("loss", loss/(1.0f*batchsize_)); +} +void EuclideanLossLayer::ComputeGradient(Phase phase) { + const float* reconstruct_dptr = srclayers_[0]->data(this).cpu_data(); + const float* input_dptr = srclayers_[1]->data(this).cpu_data(); + Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this); + float* gsrcptr = gsrcblob->mutable_cpu_data(); + for (int n = 0; n < batchsize_; n++) { + for (int j = 0; j < dim_; j++) + gsrcptr[n*dim_+j]= 2 * (reconstruct_dptr[n*dim_+j]-input_dptr[n*dim_+j]); + } + Tensor<cpu, 1> gsrc(gsrcptr, Shape1(gsrcblob->count())); + gsrc*=1.0f/(1.0f*batchsize_); +} /********** * Implementation for SoftmaxLossLayer*************************/ void SoftmaxLossLayer::Setup(const LayerProto& proto, int npartitions) { LossLayer::Setup(proto, npartitions); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/proto/job.proto ---------------------------------------------------------------------- diff --git a/src/proto/job.proto b/src/proto/job.proto index b4abe68..7f030ac 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -166,6 +166,8 @@ message LayerProto { optional ConcateProto concate_conf = 31; // configuration for dropout layer optional DropoutProto dropout_conf = 33; + // configuration for euclideanloss layer + optional EuclideanLossProto euclideanloss_conf = 50; // configuration for inner product layer optional InnerProductProto innerproduct_conf = 34; // configuration for local response normalization layer @@ -178,6 +180,10 @@ message LayerProto { optional PoolingProto pooling_conf = 37; // configuration for prefetch layer optional PrefetchProto prefetch_conf = 44; + // configuration for rbmhid layer + optional RBMHidProto rbmhid_conf = 49; + // configuration for rbmvis layer + optional RBMVisProto rbmvis_conf = 48; // configuration for rectified linear unit layer optional ReLUProto relu_conf = 38; // configuration for rgb image parser layer @@ -192,10 +198,7 @@ message LayerProto { optional SplitProto split_conf = 42; // configuration for tanh layer optional TanhProto tanh_conf = 43; - // configuration for rbmvis layer - optional RBMVisProto rbmvis_conf = 48; - // configuration for rbmhid layer - optional RBMHidProto rbmhid_conf = 49; + // overrides the partition dimension for neural net optional int32 partition_dim = 60 [default = -1]; @@ -299,6 +302,9 @@ message TanhProto { optional float inner_scale = 2 [default = 1.0]; } +message EuclideanLossProto { +} + message SoftmaxLossProto { // computing accuracy against topk results optional int32 topk = 1 [default = 1]; @@ -367,6 +373,7 @@ message RBMVisProto { message RBMHidProto { optional int32 hid_dim = 1; // The number of outputs for the layer optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional bool gaussian = 3 [default = false]; // use gaussian sampling or not } // Message that stores parameters used by InnerProductLayer @@ -375,6 +382,8 @@ message InnerProductProto { required int32 num_output = 1; // use bias vector or not optional bool bias_term = 30 [default = true]; + // transpose or not + optional bool transpose = 31 [default = false]; } message LRNProto { @@ -524,12 +533,14 @@ enum LayerType { kLRN = 6; kPooling = 8; kReLU = 9; - kRBMHid = 24; kRBMVis = 23; + kRBMHid = 24; + kSigmoid = 26; kTanh = 14; // Loss layers // - Compute objective loss kSoftmaxLoss = 11; + kEuclideanLoss = 25; // Other layers // - Connect layers when neural net is partitioned kBridgeDst = 16; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ef4de796/src/trainer/worker.cc ---------------------------------------------------------------------- diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc index e047367..f112b17 100644 --- a/src/trainer/worker.cc +++ b/src/trainer/worker.cc @@ -380,7 +380,6 @@ void BPWorker::TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net, Metric* perf) { Forward(step, phase, net, perf); } - /****************************CDWorker**********************************/ void CDWorker::Init(int thread_id, int group_id, int worker_id) { Worker::Init(thread_id, group_id, worker_id); @@ -389,8 +388,11 @@ void CDWorker::Init(int thread_id, int group_id, int worker_id) { void CDWorker::PositivePhase(int step, shared_ptr<NeuralNet> net, Metric* perf) { auto& layers = net->layers(); + // LOG(ERROR)<<"Positive Phase"; for (auto& layer : layers) { - // clock_t s=clock(); + for (Param* p : layer->GetParams()) { // wait until param is updated + Collect(p, step); + } layer->ComputeFeature(kPositive, perf); } } @@ -399,33 +401,39 @@ void CDWorker::NegativePhase(int step, shared_ptr<NeuralNet> net, Metric* perf) { // for negative phase, gibbs sampling only concerns RBM bottom and top layer auto& layers = net->layers(); - for (int i = 0; i < job_conf_.cd_conf().pcd_k(); i++) { + // LOG(ERROR)<<"Negative Phase"; for (auto& layer : layers) { - if (layer->is_vislayer() || layer->is_hidlayer()) + if (layer->is_vislayer() || layer->is_hidlayer()) { layer->ComputeFeature(kNegative, perf); + } } - } } void CDWorker::GradientPhase(int step, shared_ptr<NeuralNet> net) { auto& layers = net->layers(); + // LOG(ERROR)<<"Gradient Phase"; for (auto& layer : layers) { + if (layer->is_vislayer() || layer->is_hidlayer()) { layer->ComputeGradient(kTrain); for (Param* p : layer->GetParams()) { Update(p, step); } + } } } void CDWorker::LossPhase(int step, shared_ptr<NeuralNet> net, Metric* perf) { auto& layers = net->layers(); + // LOG(ERROR)<<"Loss Phase"; for (auto& layer : layers) { - if (layer->is_hidlayer()) + if (layer->is_hidlayer()) { layer->ComputeFeature(kLoss, perf); + } } for (auto& layer : layers) { - if (layer->is_vislayer()) + if (layer->is_vislayer()) { layer->ComputeLoss(perf); + } } }
