http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model-lmdb.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model-lmdb.conf b/examples/cifar10/model-lmdb.conf deleted file mode 100644 index ea22ccd..0000000 --- a/examples/cifar10/model-lmdb.conf +++ /dev/null @@ -1,218 +0,0 @@ -name: "cifar10-convnet" -train_steps: 70000 -test_steps:100 -test_frequency:1000 -display_frequency:50 -updater{ - momentum:0.9 - weight_decay:0.004 - learning_rate_change_method:kFixedStep - step:0 - step:60000 - step:65000 - step_lr:0.001 - step_lr:0.0001 - step_lr:0.00001 -} -neuralnet { -layer { - name: "data" - type: "kLMDBData" - data_param { - path: "examples/cifar10/cifar10_train_lmdb" - batchsize: 100 - } - exclude: kTest -} - -layer { - name: "data" - type: "kLMDBData" - data_param { - path: "examples/cifar10/cifar10_test_lmdb" - batchsize: 100 - } - exclude: kTrain -} - -layer{ - name:"rgb" - type: "kRGBImage" - srclayers: "data" - rgbimage_param { - meanfile: "examples/cifar10/mean.binaryproto" - } -} - -layer{ - name: "label" - type: "kLabel" - srclayers: "data" -} -layer { - name: "conv1" - type: "kConvolution" - srclayers: "rgb" - convolution_param { - num_filters: 32 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.0001 - learning_rate_multiplier:1.0 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - value:0 - } -} -layer { - name: "pool1" - type: "kPooling" - srclayers: "conv1" - pooling_param { - pool: MAX - kernel: 3 - stride: 2 - } -} -layer { - name: "relu1" - type: "kReLU" - srclayers:"pool1" -} -layer { - name: "norm1" - type: "kLRN" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } - srclayers:"relu1" -} -layer { - name: "conv2" - type: "kConvolution" - srclayers: "norm1" - convolution_param { - num_filters: 32 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - learning_rate_multiplier:1.0 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - value:0 - } -} -layer { - name: "relu2" - type: "kReLU" - srclayers:"conv2" -} -layer { - name: "pool2" - type: "kPooling" - srclayers: "relu2" - pooling_param { - pool: MAX - kernel: 3 - stride: 2 - } -} -layer { - name: "norm2" - type: "kLRN" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } - srclayers:"pool2" -} -layer { - name: "conv3" - type: "kConvolution" - srclayers: "norm2" - convolution_param { - num_filters: 64 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - } - param{ - name: "bias" - init_method: kConstant - value:0 - } -} -layer { - name: "relu3" - type: "kReLU" - srclayers:"conv3" -} -layer { - name: "pool3" - type: "kPooling" - srclayers: "relu3" - pooling_param { - pool: AVE - kernel: 3 - stride: 2 - } -} -layer { - name: "ip1" - type: "kInnerProduct" - srclayers:"pool3" - inner_product_param { - num_output: 10 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - learning_rate_multiplier:1.0 - weight_decay_multiplier:250 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - weight_decay_multiplier:0 - value:0 - } -} - -layer{ - name: "loss" - type:"kSoftmaxLoss" - softmaxloss_param{ - topk:1 - } - srclayers:"ip1" - srclayers:"label" -} -}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model-prefetch.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model-prefetch.conf b/examples/cifar10/model-prefetch.conf deleted file mode 100644 index 220a4b9..0000000 --- a/examples/cifar10/model-prefetch.conf +++ /dev/null @@ -1,241 +0,0 @@ -name: "cifar10-convnet" -train_steps: 70000 -test_steps:100 -test_frequency:1000 -display_frequency:50 -updater{ - momentum:0.9 - weight_decay:0.004 - learning_rate_change_method:kFixedStep - step:0 - step:60000 - step:65000 - step_lr:0.001 - step_lr:0.0001 - step_lr:0.00001 -} -neuralnet { -layer{ - name: "prefetch" - type: "kPrefetch" - sublayers { - name: "data" - type: "kShardData" - data_param { - path: "examples/cifar10/cifar10_train_shard" - batchsize: 100 - } - } - sublayers{ - name:"rgb" - type: "kRGBImage" - srclayers: "data" - rgbimage_param { - meanfile: "examples/cifar10/image_mean.bin" - } - } - sublayers{ - name: "label" - type: "kLabel" - srclayers: "data" - } - exclude: kTest -} - -layer{ - name: "prefetch" - type: "kPrefetch" - sublayers { - name: "data" - type: "kShardData" - data_param { - path: "examples/cifar10/cifar10_test_shard" - batchsize: 100 - } - } - sublayers{ - name:"rgb" - type: "kRGBImage" - srclayers: "data" - rgbimage_param { - meanfile: "examples/cifar10/image_mean.bin" - } - } - sublayers{ - name: "label" - type: "kLabel" - srclayers: "data" - } - exclude: kTrain -} - -layer { - name: "conv1" - type: "kConvolution" - srclayers: "prefetch" - datablob: "rgb" - convolution_param { - num_filters: 32 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.0001 - learning_rate_multiplier:1.0 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - value:0 - } -} - -layer { - name: "pool1" - type: "kPooling" - srclayers: "conv1" - pooling_param { - pool: MAX - kernel: 3 - stride: 2 - } -} -layer { - name: "relu1" - type: "kReLU" - srclayers:"pool1" -} -layer { - name: "norm1" - type: "kLRN" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } - srclayers:"relu1" -} -layer { - name: "conv2" - type: "kConvolution" - srclayers: "norm1" - convolution_param { - num_filters: 32 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - learning_rate_multiplier:1.0 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - value:0 - } -} -layer { - name: "relu2" - type: "kReLU" - srclayers:"conv2" -} -layer { - name: "pool2" - type: "kPooling" - srclayers: "relu2" - pooling_param { - pool: MAX - kernel: 3 - stride: 2 - } -} -layer { - name: "norm2" - type: "kLRN" - lrn_param { - norm_region: WITHIN_CHANNEL - local_size: 3 - alpha: 5e-05 - beta: 0.75 - } - srclayers:"pool2" -} -layer { - name: "conv3" - type: "kConvolution" - srclayers: "norm2" - convolution_param { - num_filters: 64 - kernel: 5 - stride: 1 - pad:2 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - } - param{ - name: "bias" - init_method: kConstant - value:0 - } -} -layer { - name: "relu3" - type: "kReLU" - srclayers:"conv3" -} -layer { - name: "pool3" - type: "kPooling" - srclayers: "relu3" - pooling_param { - pool: AVE - kernel: 3 - stride: 2 - } -} -layer { - name: "ip1" - type: "kInnerProduct" - srclayers:"pool3" - inner_product_param { - num_output: 10 - } - param{ - name: "weight" - init_method:kGaussian - std:0.01 - learning_rate_multiplier:1.0 - weight_decay_multiplier:250 - } - param{ - name: "bias" - init_method: kConstant - learning_rate_multiplier:2.0 - weight_decay_multiplier:0 - value:0 - } -} - -layer{ - name: "loss" - type:"kSoftmaxLoss" - softmaxloss_param{ - topk:1 - } - srclayers:"ip1" - srclayers:"prefetch" - datablob: "label" -} -} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/cifar10/model.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model.conf b/examples/cifar10/model.conf index bfd7683..0d6e4fd 100644 --- a/examples/cifar10/model.conf +++ b/examples/cifar10/model.conf @@ -5,22 +5,24 @@ test_frequency:300 display_frequency:30 alg: kBackPropagation updater{ - momentum:0.0 weight_decay:0.004 - learning_rate_change_method:kFixedStep - step:0 - step:60000 - step:65000 - step_lr:0.001 - step_lr:0.0001 - step_lr:0.00001 + lr_change: kFixedStep + type: kSGD + fixedstep_conf:{ + step:0 + step:60000 + step:65000 + step_lr:0.001 + step_lr:0.0001 + step_lr:0.00001 + } } neuralnet { partition_type: kDataPartition layer{ name: "data" - type: "kShardData" - data_param { + type: kShardData + sharddata_conf { path: "examples/cifar10/cifar10_train_shard" batchsize: 16 } @@ -28,8 +30,8 @@ layer{ } layer{ name: "data" - type: "kShardData" - data_param { + type: kShardData + sharddata_conf { path: "examples/cifar10/cifar10_test_shard" batchsize: 100 } @@ -37,23 +39,23 @@ layer{ } layer{ name:"rgb" - type: "kRGBImage" + type: kRGBImage srclayers: "data" - rgbimage_param { + rgbimage_conf { meanfile: "examples/cifar10/image_mean.bin" } } layer{ name: "label" - type: "kLabel" + type: kLabel srclayers: "data" } layer { name: "conv1" - type: "kConvolution" + type: kConvolution srclayers: "rgb" - convolution_param { + convolution_conf { num_filters: 32 kernel: 5 stride: 1 @@ -75,9 +77,9 @@ layer { layer { name: "pool1" - type: "kPooling" + type: kPooling srclayers: "conv1" - pooling_param { + pooling_conf { pool: MAX kernel: 3 stride: 2 @@ -85,13 +87,13 @@ layer { } layer { name: "relu1" - type: "kReLU" + type: kReLU srclayers:"pool1" } layer { name: "norm1" - type: "kLRN" - lrn_param { + type: kLRN + lrn_conf { norm_region: WITHIN_CHANNEL local_size: 3 alpha: 5e-05 @@ -101,9 +103,9 @@ layer { } layer { name: "conv2" - type: "kConvolution" + type: kConvolution srclayers: "norm1" - convolution_param { + convolution_conf { num_filters: 32 kernel: 5 stride: 1 @@ -124,14 +126,14 @@ layer { } layer { name: "relu2" - type: "kReLU" + type: kReLU srclayers:"conv2" } layer { name: "pool2" - type: "kPooling" + type: kPooling srclayers: "relu2" - pooling_param { + pooling_conf { pool: MAX kernel: 3 stride: 2 @@ -139,8 +141,8 @@ layer { } layer { name: "norm2" - type: "kLRN" - lrn_param { + type: kLRN + lrn_conf { norm_region: WITHIN_CHANNEL local_size: 3 alpha: 5e-05 @@ -150,9 +152,9 @@ layer { } layer { name: "conv3" - type: "kConvolution" + type: kConvolution srclayers: "norm2" - convolution_param { + convolution_conf { num_filters: 64 kernel: 5 stride: 1 @@ -171,14 +173,14 @@ layer { } layer { name: "relu3" - type: "kReLU" + type: kReLU srclayers:"conv3" } layer { name: "pool3" - type: "kPooling" + type: kPooling srclayers: "relu3" - pooling_param { + pooling_conf { pool: AVE kernel: 3 stride: 2 @@ -186,9 +188,9 @@ layer { } layer { name: "ip1" - type: "kInnerProduct" + type: kInnerProduct srclayers:"pool3" - inner_product_param { + innerproduct_conf { num_output: 10 } param{ @@ -209,8 +211,8 @@ layer { layer{ name: "loss" - type:"kSoftmaxLoss" - softmaxloss_param{ + type: kSoftmaxLoss + softmaxloss_conf{ topk:1 } srclayers:"ip1" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/create_shard.cc ---------------------------------------------------------------------- diff --git a/examples/mnist/create_shard.cc b/examples/mnist/create_shard.cc index f545b80..09229ac 100644 --- a/examples/mnist/create_shard.cc +++ b/examples/mnist/create_shard.cc @@ -17,7 +17,7 @@ #include "utils/data_shard.h" #include "utils/common.h" -#include "proto/model.pb.h" +#include "proto/common.pb.h" using singa::DataShard; using singa::WriteProtoToBinaryFile; @@ -85,21 +85,6 @@ void create_shard(const char* image_filename, const char* label_filename, } int main(int argc, char** argv) { -/* -#ifndef GFLAGS_GFLAGS_H_ - namespace gflags = google; -#endif - gflags::SetUsageMessage("This program create a DataShard for a MNIST dataset\n" - "Usage:\n" - " create_shard.bin input_image_file input_label_file output_db_file\n" - "The MNIST dataset could be downloaded at\n" - " http://yann.lecun.com/exdb/mnist/\n" - "You should gunzip them after downloading."); - gflags::ParseCommandLineFlags(&argc, &argv, true); - gflags::ShowUsageWithFlagsRestrict(argv[0], - "examples/mnist/create_shard.bin"); -*/ - if (argc != 4) { std::cout<<"This program create a DataShard for a MNIST dataset\n" "Usage:\n" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/mlp-lmdb.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/mlp-lmdb.conf b/examples/mnist/mlp-lmdb.conf deleted file mode 100644 index d0ed08f..0000000 --- a/examples/mnist/mlp-lmdb.conf +++ /dev/null @@ -1,223 +0,0 @@ -name: "deep-big-simple-mlp" -train_steps: 10000 -test_steps:10 -test_frequency:60 -display_frequency:30 -checkpoint_frequency:120 -updater{ - base_learning_rate: 0.001 - learning_rate_change_method: kStep - learning_rate_change_frequency: 60 - gamma: 0.997 - param_type: "Param" -} - -neuralnet { -layer { - name: "data" - type: "kLMDBData" - data_param { - path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb" - batchsize: 1000 - random_skip: 10000 - } - exclude: kTest -} - -layer { - name: "data" - type: "kLMDBData" - data_param { - path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb" - batchsize: 1000 - } - exclude: kTrain -} - -layer{ - name:"mnist" - type: "kMnistImage" - srclayers: "data" - mnist_param { -# sigma: 6 -# alpha: 38 -# gamma: 15 -# kernel: 21 -# elastic_freq:100 -# beta:15 -# resize: 29 - norm_a: 127.5 - norm_b: 1 - } -} - - -layer{ - name: "label" - type: "kLabel" - srclayers: "data" -} - -layer{ - name: "fc1" - type: "kInnerProduct" - srclayers:"mnist" - inner_product_param{ - num_output: 2500 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } -} - -layer{ - name: "tanh1" - type:"kTanh" - srclayers:"fc1" -} -layer{ - name: "fc2" - type: "kInnerProduct" - srclayers:"tanh1" - inner_product_param{ - num_output: 2000 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } -} - -layer{ - name: "tanh2" - type:"kTanh" - srclayers:"fc2" -} -layer{ - name: "fc3" - type: "kInnerProduct" - srclayers:"tanh2" - inner_product_param{ - num_output: 1500 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } - -} - -layer{ - name: "tanh3" - type:"kTanh" - srclayers:"fc3" -} -layer{ - name: "fc4" - type: "kInnerProduct" - srclayers:"tanh3" - inner_product_param{ - num_output: 1000 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } - -} - -layer{ - name: "tanh4" - type:"kTanh" - srclayers:"fc4" -} -layer{ - name: "fc5" - type: "kInnerProduct" - srclayers:"tanh4" - inner_product_param{ - num_output: 500 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } - -} - -layer{ - name: "tanh5" - type:"kTanh" - srclayers:"fc5" -} -layer{ - name: "fc6" - type: "kInnerProduct" - srclayers:"tanh5" - inner_product_param{ - num_output: 10 - } - param{ - name: "weight" - init_method: kUniform - low:-0.05 - high:0.05 - } - param{ - name: "bias" - init_method: kUniform - low: -0.05 - high:0.05 - } -} -layer{ - name: "loss" - type:"kSoftmaxLoss" - softmaxloss_param{ - topk:1 - } - srclayers:"fc6" - srclayers:"label" -} -} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/examples/mnist/mlp.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/mlp.conf b/examples/mnist/mlp.conf index 9eeb1c6..3786c4f 100644 --- a/examples/mnist/mlp.conf +++ b/examples/mnist/mlp.conf @@ -3,19 +3,22 @@ train_steps: 10000 test_steps:10 test_frequency:60 display_frequency:30 +alg: kBackPropagation updater{ - base_learning_rate: 0.001 - learning_rate_change_method: kStep - learning_rate_change_frequency: 60 - gamma: 0.997 - param_type: "Param" + base_lr: 0.001 + lr_change: kStep + type: kSGD + step_conf{ + change_freq: 60 + gamma: 0.997 + } } neuralnet { layer { name: "data" - type: "kShardData" - data_param { + type: kShardData + sharddata_conf { path: "examples/mnist/mnist_train_shard" batchsize: 1000 } @@ -24,8 +27,8 @@ layer { layer { name: "data" - type: "kShardData" - data_param { + type: kShardData + sharddata_conf { path: "examples/mnist/mnist_test_shard" batchsize: 1000 } @@ -34,9 +37,9 @@ layer { layer{ name:"mnist" - type: "kMnistImage" + type: kMnist srclayers: "data" - mnist_param { + mnist_conf { # sigma: 6 # alpha: 38 # gamma: 15 @@ -52,15 +55,15 @@ layer{ layer{ name: "label" - type: "kLabel" + type: kLabel srclayers: "data" } layer{ name: "fc1" - type: "kInnerProduct" + type: kInnerProduct srclayers:"mnist" - inner_product_param{ + innerproduct_conf{ num_output: 2500 } param{ @@ -79,14 +82,14 @@ layer{ layer{ name: "tanh1" - type:"kTanh" + type: kTanh srclayers:"fc1" } layer{ name: "fc2" - type: "kInnerProduct" + type: kInnerProduct srclayers:"tanh1" - inner_product_param{ + innerproduct_conf{ num_output: 2000 } param{ @@ -105,14 +108,14 @@ layer{ layer{ name: "tanh2" - type:"kTanh" + type: kTanh srclayers:"fc2" } layer{ name: "fc3" - type: "kInnerProduct" + type: kInnerProduct srclayers:"tanh2" - inner_product_param{ + innerproduct_conf{ num_output: 1500 } param{ @@ -132,14 +135,14 @@ layer{ layer{ name: "tanh3" - type:"kTanh" + type: kTanh srclayers:"fc3" } layer{ name: "fc4" - type: "kInnerProduct" + type: kInnerProduct srclayers:"tanh3" - inner_product_param{ + innerproduct_conf{ num_output: 1000 } param{ @@ -159,14 +162,14 @@ layer{ layer{ name: "tanh4" - type:"kTanh" + type: kTanh srclayers:"fc4" } layer{ name: "fc5" - type: "kInnerProduct" + type: kInnerProduct srclayers:"tanh4" - inner_product_param{ + innerproduct_conf{ num_output: 500 } param{ @@ -186,14 +189,14 @@ layer{ layer{ name: "tanh5" - type:"kTanh" + type: kTanh srclayers:"fc5" } layer{ name: "fc6" - type: "kInnerProduct" + type: kInnerProduct srclayers:"tanh5" - inner_product_param{ + innerproduct_conf{ num_output: 10 } param{ @@ -211,8 +214,8 @@ layer{ } layer{ name: "loss" - type:"kSoftmaxLoss" - softmaxloss_param{ + type:kSoftmaxLoss + softmaxloss_conf{ topk:1 } srclayers:"fc6" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/neuralnet/base_layer.h ---------------------------------------------------------------------- diff --git a/include/neuralnet/base_layer.h b/include/neuralnet/base_layer.h index d7c4c3a..8b5b1bf 100644 --- a/include/neuralnet/base_layer.h +++ b/include/neuralnet/base_layer.h @@ -12,6 +12,7 @@ #include <thread> #include "proto/model.pb.h" +#include "proto/common.pb.h" #include "utils/param.h" #include "utils/common.h" #include "utils/blob.h" @@ -173,7 +174,7 @@ class Layer { name_=name; layer_proto_.set_name(name); } - virtual const string type() const { + virtual int type() const { return layer_proto_.type(); } /** @@ -404,9 +405,7 @@ class DataLayer: public Layer{ return kNone; } - virtual int batchsize() const { - return layer_proto_.data_param().batchsize(); - } + virtual int batchsize() const=0; virtual const Record& sample() const { return sample_; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/neuralnet/layer.h ---------------------------------------------------------------------- diff --git a/include/neuralnet/layer.h b/include/neuralnet/layer.h index bfbee8f..48cffa2 100644 --- a/include/neuralnet/layer.h +++ b/include/neuralnet/layer.h @@ -168,7 +168,7 @@ class LRNLayer: public Layer { Blob<float> norm_; }; -class MnistImageLayer: public ParserLayer { +class MnistLayer: public ParserLayer { public: using Layer::Setup; @@ -290,6 +290,9 @@ class ShardDataLayer: public DataLayer{ virtual void ComputeFeature(Phase phase, const vector<shared_ptr<Layer>>& srclayers); virtual void ComputeGradient(const vector<shared_ptr<Layer>>& srclayers){}; virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers); + virtual int batchsize() const { + return layer_proto_.sharddata_conf().batchsize(); + } private: shared_ptr<DataShard> shard_; }; @@ -304,7 +307,9 @@ class LMDBDataLayer: public DataLayer{ virtual void Setup(const LayerProto& proto, const vector<SLayer>& srclayers); void ConvertDatumToSingleLableImageRecord(const Datum& datum, SingleLabelImageRecord* record); - + virtual int batchsize() const { + return layer_proto_.lmdbdata_conf().batchsize(); + } private: MDB_env* mdb_env_; MDB_dbi mdb_dbi_; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/utils/blob.h ---------------------------------------------------------------------- diff --git a/include/utils/blob.h b/include/utils/blob.h index 8234b28..97b4ee7 100644 --- a/include/utils/blob.h +++ b/include/utils/blob.h @@ -43,7 +43,7 @@ #include <memory> #include <vector> #include <glog/logging.h> -#include "proto/model.pb.h" +#include "proto/common.pb.h" using std::shared_ptr; using std::vector; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/include/utils/factory.h ---------------------------------------------------------------------- diff --git a/include/utils/factory.h b/include/utils/factory.h index 3201853..1e3069c 100644 --- a/include/utils/factory.h +++ b/include/utils/factory.h @@ -26,7 +26,7 @@ class Factory { /** * Register functions to create user defined classes. * This function is called by the REGISTER_FACTORY macro. - * + * * @param id Identifier of the creating function/class * @param func a function that creates a layer instance */ @@ -37,19 +37,43 @@ class Factory { str2func_[id] = func; } /** - * create a layer instance by providing its type - * - * @param id The identifier of the layer to be created + * Register functions to create user defined classes. + * This function is called by the REGISTER_FACTORY macro. + * + * @param id Identifier of the creating function/class + * @param func a function that creates a layer instance + */ + inline void Register(int id, + const std::function<T*(void)>& func) { + CHECK(id2func_.find(id) == id2func_.end()) + << "The id has been registered by another function"; + id2func_[id] = func; + } + + /** + * create an instance by providing its id + * + * @param id */ inline T* Create(const std::string& id) { CHECK(str2func_.find(id) != str2func_.end()) << "The creation function for " << id << " has not been registered"; return str2func_[id](); } - + /** + * create an instance by providing its id + * + * @param id + */ + inline T* Create(int id) { + CHECK(id2func_.find(id) != id2func_.end()) + << "The creation function for " << id << " has not been registered"; + return id2func_[id](); + } private: // Map that stores the registered creation functions std::map<std::string, std::function<T*(void)>> str2func_; + std::map<int, std::function<T*(void)>> id2func_; }; #endif // SINGA_UTILS_FACTORY_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/base_layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/base_layer.cc b/src/neuralnet/base_layer.cc index 63ac7a0..95628cb 100644 --- a/src/neuralnet/base_layer.cc +++ b/src/neuralnet/base_layer.cc @@ -73,7 +73,7 @@ void BridgeDstLayer::SetupAfterPartition(){ /************* Implementation for ConcateLayer ***********/ void ConcateLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ - size_t concate_dim=proto.concate_param().concate_dimension(); + size_t concate_dim=proto.concate_conf().concate_dimension(); CHECK_GE(concate_dim,0); CHECK_GT(srclayers.size(),1); vector<int> shape=srclayers[0]->data(this).shape(); @@ -131,9 +131,10 @@ void PrefetchLayer::ComputeFeature(Phase phase, void PrefetchLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ Factory<Layer>* factory=Singleton<Factory<Layer>>::Instance(); - CHECK_GE(proto.sublayers_size(), 1); + const auto& sublayers=proto.prefetch_conf().sublayers(); + CHECK_GE(sublayers.size(), 1); map<string, SLayer> layers; - for(auto const &p:proto.sublayers()){ + for(auto const &p:sublayers){ auto layer=shared_ptr<Layer>(factory->Create(p.type())); layer->Init(p); sublayers_.push_back(layer); @@ -141,7 +142,7 @@ void PrefetchLayer::Setup(const LayerProto& proto, } // TODO topology sort layers auto layer=sublayers_.begin(); - for(auto const &p:proto.sublayers()){ + for(auto const &p:sublayers){ std::vector<SLayer> src; for(auto const &srcname: p.srclayers()){ src.push_back(layers[srcname]); @@ -180,8 +181,8 @@ PrefetchLayer::~PrefetchLayer(){ /************* Implementation for SliceLayer****************/ void SliceLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ - slice_dim_=proto.slice_param().slice_dimension(); - slice_num_=proto.slice_param().slice_num(); + slice_dim_=proto.slice_conf().slice_dimension(); + slice_num_=proto.slice_conf().slice_num(); CHECK_GE(slice_dim_,0); CHECK_EQ(slice_num_, dstlayers_.size()); data_.Reshape(srclayers[0]->data(this).shape()); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc index 04ce72a..db13824 100644 --- a/src/neuralnet/layer.cc +++ b/src/neuralnet/layer.cc @@ -18,12 +18,12 @@ namespace singa { void ConvolutionLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ CHECK_EQ(srclayers.size(),1); - ConvolutionProto conv_param=proto.convolution_param(); - kernel_=conv_param.kernel(); + ConvolutionProto conv_conf=proto.convolution_conf(); + kernel_=conv_conf.kernel(); CHECK_GT(kernel_, 0) << "Filter size cannot be zero."; - pad_=conv_param.pad(); - stride_=conv_param.stride(); - num_filters_=conv_param.num_filters(); + pad_=conv_conf.pad(); + stride_=conv_conf.stride(); + num_filters_=conv_conf.num_filters(); const vector<int>& srcshape=srclayers[0]->data(this).shape(); int dim=srcshape.size(); CHECK_GT(dim, 2); @@ -55,8 +55,8 @@ void ConvolutionLayer::SetupAfterPartition(const LayerProto& proto, const vector<int> &shape, const vector<SLayer>& srclayers){ LayerProto newproto(proto); - ConvolutionProto *conv_param=newproto.mutable_convolution_param(); - conv_param->set_num_filters(shape[1]); + ConvolutionProto *conv_conf=newproto.mutable_convolution_conf(); + conv_conf->set_num_filters(shape[1]); Setup(newproto, srclayers); } @@ -128,7 +128,7 @@ void DropoutLayer::Setup(const LayerProto& proto, data_.ReshapeLike(srclayers[0]->data(this)); grad_.ReshapeLike(*srclayers[0]->mutable_grad(this)); mask_.Reshape(srclayers[0]->data(this).shape()); - pdrop_=proto.dropout_param().dropout_ratio(); + pdrop_=proto.dropout_conf().dropout_ratio(); } void DropoutLayer::SetupAfterPartition(const LayerProto& proto, @@ -167,7 +167,7 @@ void InnerProductLayer::Setup(const LayerProto& proto, const auto& src=srclayers[0]->data(this); batchsize_=src.shape()[0]; vdim_=src.count()/batchsize_; - hdim_=proto.inner_product_param().num_output(); + hdim_=proto.innerproduct_conf().num_output(); data_.Reshape(vector<int>{batchsize_, hdim_}); grad_.ReshapeLike(data_); Factory<Param>* factory=Singleton<Factory<Param>>::Instance(); @@ -180,7 +180,7 @@ void InnerProductLayer::SetupAfterPartition(const LayerProto& proto, const vector<int> &shape, const vector<SLayer>& srclayers){ LayerProto newproto(proto); - InnerProductProto * innerproto=newproto.mutable_inner_product_param(); + InnerProductProto * innerproto=newproto.mutable_innerproduct_conf(); innerproto->set_num_output(shape[1]); Setup(newproto, srclayers); } @@ -301,16 +301,16 @@ void LMDBDataLayer::Setup(const LayerProto& proto, CHECK_EQ(mdb_env_create(&mdb_env_), MDB_SUCCESS) << "mdb_env_create failed"; CHECK_EQ(mdb_env_set_mapsize(mdb_env_, 1099511627776), MDB_SUCCESS); // 1TB CHECK_EQ(mdb_env_open(mdb_env_, - proto.data_param().path().c_str(), + proto.lmdbdata_conf().path().c_str(), MDB_RDONLY, 0664), MDB_SUCCESS) << "cannot open lmdb " - << proto.data_param().path(); + << proto.lmdbdata_conf().path(); CHECK_EQ(mdb_txn_begin(mdb_env_, NULL, MDB_RDONLY, &mdb_txn_), MDB_SUCCESS) << "mdb_txn_begin failed"; CHECK_EQ(mdb_open(mdb_txn_, NULL, 0, &mdb_dbi_), MDB_SUCCESS) << "mdb_open failed"; CHECK_EQ(mdb_cursor_open(mdb_txn_, mdb_dbi_, &mdb_cursor_), MDB_SUCCESS) << "mdb_cursor_open failed"; - LOG(INFO) << "Opening lmdb " << proto.data_param().path(); + LOG(INFO) << "Opening lmdb " << proto.lmdbdata_conf().path(); CHECK_EQ(mdb_cursor_get(mdb_cursor_, &mdb_key_, &mdb_value_, MDB_FIRST), MDB_SUCCESS) << "mdb_cursor_get failed"; @@ -324,20 +324,20 @@ void LMDBDataLayer::Setup(const LayerProto& proto, SingleLabelImageRecord* record=sample_.mutable_image(); ConvertDatumToSingleLableImageRecord(datum, record); - batchsize_=proto.data_param().batchsize(); + batchsize_=batchsize(); records_.resize(batchsize_); - random_skip_=proto.data_param().random_skip(); + random_skip_=proto.lmdbdata_conf().random_skip(); } /***************** Implementation for LRNLayer *************************/ void LRNLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ CHECK_EQ(srclayers.size(),1); - lsize_ = proto.lrn_param().local_size(); + lsize_ = proto.lrn_conf().local_size(); CHECK_EQ(lsize_ % 2, 1) << "LRN only supports odd values for Localvol"; - knorm_=proto.lrn_param().knorm(); - alpha_ = proto.lrn_param().alpha(); - beta_ = proto.lrn_param().beta(); + knorm_=proto.lrn_conf().knorm(); + alpha_ = proto.lrn_conf().alpha(); + beta_ = proto.lrn_conf().beta(); const vector<int>& s=srclayers[0]->data(this).shape(); data_.Reshape(s); @@ -381,7 +381,7 @@ void LRNLayer::ComputeGradient(const vector<SLayer>& srclayers) { /**************** Implementation for MnistImageLayer******************/ -void MnistImageLayer::ParseRecords(Phase phase, +void MnistLayer::ParseRecords(Phase phase, const vector<Record>& records, Blob<float>* blob){ LOG_IF(ERROR, records.size()==0)<<"Empty records to parse"; int ndim=records.at(0).image().shape_size(); @@ -448,20 +448,20 @@ void MnistImageLayer::ParseRecords(Phase phase, } CHECK_EQ(dptr, blob->mutable_cpu_data()+blob->count()); } -void MnistImageLayer::Setup(const LayerProto& proto, +void MnistLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ CHECK_EQ(srclayers.size(),1); int batchsize=static_cast<DataLayer*>(srclayers[0].get())->batchsize(); Record sample=static_cast<DataLayer*>(srclayers[0].get())->sample(); - kernel_=proto.mnist_param().kernel(); - sigma_=proto.mnist_param().sigma(); - alpha_=proto.mnist_param().alpha(); - beta_=proto.mnist_param().beta(); - gamma_=proto.mnist_param().gamma(); - resize_=proto.mnist_param().resize(); - norm_a_=proto.mnist_param().norm_a(); - norm_b_=proto.mnist_param().norm_b(); - elastic_freq_=proto.mnist_param().elastic_freq(); + kernel_=proto.mnist_conf().kernel(); + sigma_=proto.mnist_conf().sigma(); + alpha_=proto.mnist_conf().alpha(); + beta_=proto.mnist_conf().beta(); + gamma_=proto.mnist_conf().gamma(); + resize_=proto.mnist_conf().resize(); + norm_a_=proto.mnist_conf().norm_a(); + norm_b_=proto.mnist_conf().norm_b(); + elastic_freq_=proto.mnist_conf().elastic_freq(); int ndim=sample.image().shape_size(); CHECK_GE(ndim,2); @@ -478,11 +478,11 @@ void MnistImageLayer::Setup(const LayerProto& proto, void PoolingLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ CHECK_EQ(srclayers.size(),1); - PoolingProto pool_param = proto.pooling_param(); - kernel_=pool_param.kernel(); - stride_=pool_param.stride(); + PoolingProto pool_conf = proto.pooling_conf(); + kernel_=pool_conf.kernel(); + stride_=pool_conf.stride(); CHECK_LT(pad_, kernel_); - pool_=proto.pooling_param().pool(); + pool_=proto.pooling_conf().pool(); CHECK(pool_ == PoolingProto_PoolMethod_AVE || pool_ == PoolingProto_PoolMethod_MAX) << "Padding implemented only for average and max pooling."; @@ -628,9 +628,9 @@ void RGBImageLayer::ParseRecords(Phase phase, void RGBImageLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ CHECK_EQ(srclayers.size(),1); - scale_=proto.rgbimage_param().scale(); - cropsize_=proto.rgbimage_param().cropsize(); - mirror_=proto.rgbimage_param().mirror(); + scale_=proto.rgbimage_conf().scale(); + cropsize_=proto.rgbimage_conf().cropsize(); + mirror_=proto.rgbimage_conf().mirror(); int batchsize=static_cast<DataLayer*>(srclayers[0].get())->batchsize(); Record sample=static_cast<DataLayer*>(srclayers[0].get())->sample(); vector<int> shape; @@ -645,15 +645,15 @@ void RGBImageLayer::Setup(const LayerProto& proto, } data_.Reshape(shape); mean_.Reshape({shape[1],shape[2],shape[3]}); - if(proto.rgbimage_param().has_meanfile()){ - if(proto.rgbimage_param().meanfile().find("binaryproto")!=string::npos){ + if(proto.rgbimage_conf().has_meanfile()){ + if(proto.rgbimage_conf().meanfile().find("binaryproto")!=string::npos){ BlobProto tmp; - ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp); + ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &tmp); CHECK_EQ(mean_.count(), tmp.data_size()); memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size()); }else{ SingleLabelImageRecord tmp; - ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp); + ReadProtoFromBinaryFile(proto.rgbimage_conf().meanfile().c_str(), &tmp); CHECK_EQ(mean_.count(), tmp.data_size()); memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size()); } @@ -685,14 +685,14 @@ void ShardDataLayer::ComputeFeature(Phase phase, const vector<SLayer>& srclayers void ShardDataLayer::Setup(const LayerProto& proto, const vector<SLayer>& srclayers){ - shard_= std::make_shared<DataShard>(proto.data_param().path(), + shard_= std::make_shared<DataShard>(proto.sharddata_conf().path(), DataShard::kRead); string key; shard_->Next(&key, &sample_); - batchsize_=proto.data_param().batchsize(); + batchsize_=proto.sharddata_conf().batchsize(); records_.resize(batchsize_); - random_skip_=proto.data_param().random_skip(); + random_skip_=proto.sharddata_conf().random_skip(); } /*******************Implementation of TanLayer***************************/ void TanhLayer::Setup(const LayerProto& proto, @@ -729,9 +729,9 @@ void SoftmaxLossLayer::Setup(const LayerProto& proto, data_.Reshape(srclayers[0]->data(this).shape()); batchsize_=data_.shape()[0]; dim_=data_.count()/batchsize_; - topk_=proto.softmaxloss_param().topk(); + topk_=proto.softmaxloss_conf().topk(); metric_.Reshape(vector<int>{2}); - scale_=proto.softmaxloss_param().scale(); + scale_=proto.softmaxloss_conf().scale(); } void SoftmaxLossLayer::SetupAfterPartition(const LayerProto& proto, const vector<int> &shape, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/neuralnet/neuralnet.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc index b88d4a6..2240499 100644 --- a/src/neuralnet/neuralnet.cc +++ b/src/neuralnet/neuralnet.cc @@ -1,6 +1,7 @@ #include <algorithm> #include <queue> +#include "proto/model.pb.h" #include "neuralnet/neuralnet.h" #include "utils/singleton.h" #include "utils/factory.h" @@ -8,29 +9,33 @@ #include "utils/cluster.h" namespace singa { -#define CreateLayer(id) CreateInstance(id, Layer) +#define LayerT(x) LayerProto_LayerType_k##x + +#define RegisterLayer(factory, id) \ + factory->Register(LayerProto_LayerType_k##id,\ + CreateInstance(id##Layer, Layer)) void NeuralNet::RegisterLayers(){ Factory<Layer>* factory=Singleton<Factory<Layer>>::Instance(); - factory->Register("kBridgeDst", CreateLayer(BridgeDstLayer)); - factory->Register("kBridgeSrc", CreateLayer(BridgeSrcLayer)); - factory->Register("kConvolution", CreateLayer(ConvolutionLayer)); - factory->Register("kConcate", CreateLayer(ConcateLayer)); - factory->Register("kDropout", CreateLayer(DropoutLayer)); - factory->Register("kInnerProduct", CreateLayer(InnerProductLayer)); - factory->Register("kLabel", CreateLayer(LabelLayer)); - factory->Register("kLMDBData", CreateLayer(LMDBDataLayer)); - factory->Register("kLRN", CreateLayer(LRNLayer)); - factory->Register("kMnistImage", CreateLayer(MnistImageLayer)); - factory->Register("kPooling", CreateLayer(PoolingLayer)); - factory->Register("kPrefetch", CreateLayer(PrefetchLayer)); - factory->Register("kRGBImage", CreateLayer(RGBImageLayer)); - factory->Register("kReLU", CreateLayer(ReLULayer)); - factory->Register("kShardData", CreateLayer(ShardDataLayer)); - factory->Register("kSlice", CreateLayer(SliceLayer)); - factory->Register("kSoftmaxLoss", CreateLayer(SoftmaxLossLayer)); - factory->Register("kSplit", CreateLayer(SplitLayer)); - factory->Register("kTanh", CreateLayer(TanhLayer)); + RegisterLayer(factory, BridgeDst); + RegisterLayer(factory, BridgeSrc); + RegisterLayer(factory, Convolution); + RegisterLayer(factory, Concate); + RegisterLayer(factory, Dropout); + RegisterLayer(factory, InnerProduct); + RegisterLayer(factory, Label); + RegisterLayer(factory, LMDBData); + RegisterLayer(factory, LRN); + RegisterLayer(factory, Mnist); + RegisterLayer(factory, Prefetch); + RegisterLayer(factory, Pooling); + RegisterLayer(factory, RGBImage); + RegisterLayer(factory, ReLU); + RegisterLayer(factory, ShardData); + RegisterLayer(factory, Slice); + RegisterLayer(factory, SoftmaxLoss); + RegisterLayer(factory, Split); + RegisterLayer(factory, Tanh); } shared_ptr<NeuralNet> NeuralNet::SetupNeuralNet(const NetProto& np, Phase phase, int group_size){ @@ -139,7 +144,7 @@ void NeuralNet::PartitionNeuralNet(){ graph_=CreatePartitonedGraph(layers_, name2layer_); //DLOG(ERROR)<<"pure graph after partition\n"<<graph_.ToString(); map<string, shared_ptr<Layer>> name2layer(name2layer_); - map<string, vector<shared_ptr<Layer>>> share_param_layers; + map<string, vector<shared_ptr<Layer>>> share_conf_layers; name2layer_.clear(); layers_.clear(); int gsize=group_size_; @@ -149,23 +154,25 @@ void NeuralNet::PartitionNeuralNet(){ LayerProto proto; proto.set_name(node->name()); proto.set_partitionid(node->val().partitionid); - const string& origin=node->val().origin; + string origin=node->val().origin; if (origin=="kSlice"){ - proto.set_type(origin); - SliceProto *slice=proto.mutable_slice_param(); + proto.set_type(LayerT(Slice)); + SliceProto *slice=proto.mutable_slice_conf(); slice->set_slice_dimension(node->val().slice_dimension); slice->set_slice_num(node->dstnodes().size()); }else if(origin== "kConcate"){ - proto.set_type(origin); - ConcateProto *concate=proto.mutable_concate_param(); + proto.set_type(LayerT(Concate)); + ConcateProto *concate=proto.mutable_concate_conf(); concate->set_concate_dimension(node->val().concate_dimension); concate->set_concate_num(node->srcnodes().size()); }else if(origin=="kSplit"){ - proto.set_type(origin); - SplitProto *split=proto.mutable_split_param(); + proto.set_type(LayerT(Split)); + SplitProto *split=proto.mutable_split_conf(); split->set_num_splits(node->dstnodes().size()); - }else if(origin=="kBridgeSrc" || origin== "kBridgeDst"){ - proto.set_type(origin); + }else if(origin=="kBridgeSrc"){ + proto.set_type(LayerT(BridgeSrc)); + }else if(origin =="kBridgeDst"){ + proto.set_type(LayerT(BridgeDst)); }else{ CHECK(name2layer.find(node->val().origin)!=name2layer_.end()) <<"Unkown origin for node "<<node->val().origin; @@ -191,7 +198,7 @@ void NeuralNet::PartitionNeuralNet(){ layer->set_name(node->name()); newlayer=layer; if(oldlayer->partition_type()==kDataPartition) - share_param_layers[node->val().origin].push_back(newlayer); + share_conf_layers[node->val().origin].push_back(newlayer); } newlayer->set_partitionid(node->val().partitionid); } @@ -226,15 +233,15 @@ void NeuralNet::PartitionNeuralNet(){ // share Params for layers generated from the same origin layer due to // data partition - for(auto & entry: share_param_layers){ + for(auto & entry: share_conf_layers){ auto layers= entry.second; auto owner=layers.begin(); - auto owner_params=(*owner)->GetParams(); + auto owner_confs=(*owner)->GetParams(); for(auto it=owner+1; it!=layers.end();it++){ auto params=(*it)->GetParams(); - CHECK_EQ(params.size(), owner_params.size()); + CHECK_EQ(params.size(), owner_confs.size()); for(size_t i=0;i<params.size();i++) - params.at(i)->ShareData(owner_params.at(i)); + params.at(i)->ShareData(owner_confs.at(i)); } } LOG(INFO)<<"network graph after partition layers\n"<<ToString(); @@ -369,7 +376,6 @@ std::string NeuralNet::ToString(){ map<string, string> info; for(auto layer: layers_){ info[layer->name()]=IntVecToString(layer->shape(nullptr)); - string type=layer->type(); } return graph_.ToString(info); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/proto/common.proto ---------------------------------------------------------------------- diff --git a/src/proto/common.proto b/src/proto/common.proto new file mode 100644 index 0000000..6bc0919 --- /dev/null +++ b/src/proto/common.proto @@ -0,0 +1,80 @@ +package singa; + +enum MsgType { + kGet = 0; + kPut = 1; + kSync = 2; + kUpdate = 3; + kSyncRequest = 4; + kSyncResponse = 5; + kStop = 6; + kData = 7; + kRGet = 8; + kRUpdate = 9; + kConnect = 10; + kMetric = 11; +}; + +enum EntityType { + kWorkerParam = 0; + kWorkerLayer = 1; + kServer = 2; + kStub = 3; + kRuntime = 4; +}; + +enum ShareOption { + kValueOnly = 0; + kWhole = 1; +}; + +message BlobProtos { + repeated BlobProto blobs = 1; + repeated int32 ids = 2; + repeated string names = 3; +} + +enum ConnectionType { + kOneToOne = 0; + kOneToAll = 1; +} + +// to import caffe's lmdb dataset +message Datum { + optional int32 channels = 1; + optional int32 height = 2; + optional int32 width = 3; + // the actual image data, in bytes + optional bytes data = 4; + optional int32 label = 5; + // Optionally, the datum could also hold float data. + repeated float float_data = 6; + // If true data contains an encoded image that need to be decoded + optional bool encoded = 7 [default = false]; +} + +message BlobProto { + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; +} + +message Record { + enum Type { + // each record contains image raw feature and its label. + kSingleLabelImage = 0; + } + optional Type type = 1 [default = kSingleLabelImage]; + // configuration for + optional SingleLabelImageRecord image = 2; +} + +message SingleLabelImageRecord { + repeated int32 shape = 1; + optional int32 label = 2; + optional bytes pixel = 3; + repeated float data = 4; +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/proto/model.proto ---------------------------------------------------------------------- diff --git a/src/proto/model.proto b/src/proto/model.proto index 1e12087..4256491 100644 --- a/src/proto/model.proto +++ b/src/proto/model.proto @@ -3,76 +3,68 @@ enum Phase { kTrain = 0; kValidation = 1; kTest= 2; + // postivie phase for contrastive divergence algorithm kPositive = 3; + // negative phase for contrastive divergence algorithm kNegative = 4; } -enum ShareOption { - kValueOnly = 0; - kWhole = 1; -}; -message ModelProto{ +message ModelProto { + // model name, e.g., "cifar10-dcnn", "mnist-mlp" required string name = 1; - // start display after this num steps - optional int32 display_after_steps = 6 [default = 0]; - // frequency of display - optional int32 display_frequency = 7 [default = 0]; - - // start validation after this num steps - optional int32 validation_after_steps = 10 [default = 0]; - // frequency of validation - optional int32 validation_frequency = 11 [default = 0]; - - // start test after this num steps - optional int32 test_after_steps = 13 [default = 0]; - // frequency of test - optional int32 test_frequency = 14 [default = 0]; - optional int32 checkpoint_after_steps = 15 [default = 0]; - // frequency of test - optional int32 checkpoint_frequency = 16 [default = 0]; - optional bool prefetch=18[default = true]; + // frequency of displaying training info + required int32 display_frequency = 3 ; // total num of steps for training - required int32 train_steps = 20; - // total num of steps for validation - optional int32 validation_steps=21 [default=0]; - // total num of steps for test - optional int32 test_steps=22 [default=0]; - // last snapshot step - optional int32 step=29; - - required UpdaterProto updater=31; - // There are two basic algorithms for calculating gradients. - // Different deep learning models use different algorithms. + required int32 train_steps = 5; + // configuration of SGD updater, including learning rate, etc. + required UpdaterProto updater = 7; enum GradCalcAlg { + // BP algorithm for feed-forward models, e.g., CNN, MLP, RNN kBackPropagation = 1; + // CD algorithm for RBM, DBM etc., models kContrastiveDivergence = 2; } - required GradCalcAlg alg= 32 [default = kBackPropagation]; - required NetProto neuralnet = 40; - optional bool debug=41 [default=false]; - optional int32 warmup_steps=50 [default=0]; + // gradient calculation algorithm + required GradCalcAlg alg = 8 [default = kBackPropagation]; + required NetProto neuralnet = 9; + + // total num of steps for validation + optional int32 validation_steps = 30 [default = 0]; + // total num of steps for test + optional int32 test_steps = 31 [default = 0]; + // frequency of validation + optional int32 validation_frequency = 32; + // frequency of test + optional int32 test_frequency = 33 [default = 0]; + // frequency of checkpoint + optional int32 checkpoint_frequency = 34 [default = 0]; + // send parameters to servers after training for this num of steps + optional int32 warmup_steps = 35 [default = 0]; + + // start display after this num steps + optional int32 display_after_steps = 60[default = 0]; + // start checkpoint after this num steps + optional int32 checkpoint_after_steps = 61 [default = 0]; + // start test after this num steps + optional int32 test_after_steps = 62 [default = 0]; +// start validation after this num steps + optional int32 validation_after_steps = 63 [default = 0]; + // last snapshot step + optional int32 step = 64 [default = 0]; + // display debug info + optional bool debug = 65 [default = false]; } message NetProto { repeated LayerProto layer = 1; + // partitioning type for parallelism optional PartitionType partition_type = 3 [default = kNone]; } +// weight matrix should be defined before bias vector message ParamProto { - // for the program to identify it and share among layers. - // e.g., "conv1_weight","fc_bias" - required string name = 1; - optional int32 id=2; - // in most situations, user do not need to config this, - // the program will calculate it - repeated int32 shape = 3; - // split the parameter into multiple sub params for serialzation and - // transferring (Google Protobuf has size limit) - optional int32 split_threshold = 4 [default = 5000000]; - // partition dimension, -1 for no partition - optional int32 partition_dim = 5 [default = -1]; - optional int32 owner = 6; enum InitMethod { + // fix the values of all parameters a constant in the value field kConstant = 0; // sample gaussian with std and mean kGaussian = 1; @@ -87,31 +79,38 @@ message ParamProto { // a=sqrt(3)/sqrt(fan_in), range is [-a, +a]; no need to set value=sqrt(3), // the program will multiply it. kUniformSqrtFanIn = 5; - // from Theano MLP tutorial, let a=1/sqrt(fan_in+fan_out). for tanh - // activation, range is [-6a, +6a], for sigmoid activation, range is - // [-24a, +24a], put the scale factor to value field. + // from Theano MLP tutorial, let a=sqrt(6/(fan_in+fan_out)). for tanh + // activation, range is [-a, +a], for sigmoid activation, range is + // [-4a, +4a], put the scale factor to value field. // <a href="http://deeplearning.net/tutorial/mlp.html"> Theano MLP</a> kUniformSqrtFanInOut = 6; } - optional InitMethod init_method = 7 [default = kConstant]; + required InitMethod init_method = 1 [default = kGaussian]; // constant init - optional float value = 8 [default = 1]; + optional float value = 5 [default = 1]; // for uniform sampling - optional float low = 9 [default = -1]; - optional float high = 10 [default = 1]; + optional float low = 6 [default = -1]; + optional float high = 7 [default = 1]; // for gaussian sampling - optional float mean = 11 [default = 0]; - optional float std = 12 [default = 1]; + optional float mean = 8 [default = 0]; + optional float std = 9 [default = 1]; // multiplied on the global learning rate. - optional float learning_rate_multiplier = 13 [default = 1]; + optional float learning_rate_multiplier = 15 [default = 1]; // multiplied on the global weight decay. - optional float weight_decay_multiplier = 14 [default = 1]; -} + optional float weight_decay_multiplier = 16 [default = 1]; + // partition dimension, -1 for no partition + optional int32 partition_dim = 30 [default = -1]; + // usually, the program will infer the param shape + repeated int32 shape = 31; -message BlobProtos { - repeated BlobProto blobs = 1; - repeated int32 ids = 2; - repeated string names = 3; + // used for identifying the same params from diff models and display deug info + optional string name = 61 [default = "param"]; + // used interally + optional int32 id = 62; + // parameter slice limit (Google Protobuf also has size limit) + optional int32 split_threshold = 63 [default = 5000000]; + // used internally + optional int32 owner = 64 [default = -1]; } enum PartitionType{ @@ -120,159 +119,212 @@ enum PartitionType{ kNone=2; } -enum ConnectionType{ - kOneToOne=0; - kOneToAll=1; -} - message LayerProto { - required string name = 1; // the layer name - required string type = 2; // the layer type from the enum above - repeated string srclayers=3; - optional int32 locationid=4 [default=0]; // todo make locationID an array - optional int32 partitionid=5 [default=0]; - optional PartitionType partition_type=6 [default=kNone]; - optional string datablob=7; - // can be pos/neg neuron value for CD, neuron value/grad for BP - //repeated DAryProto ary = 10; - repeated string share_ary = 11; + // the layer name used for identification + required string name = 1; + enum LayerType{ + kBridgeSrc = 15; + kBridgeDst = 16; + kConvolution = 1; + kConcate = 2; + kShardData = 3; + kDropout = 4; + kInnerProduct = 5; + kLabel = 18; + kLMDBData = 17; + kLRN = 6; + kMnist = 7; + kPooling = 8; + kPrefetch = 19; + kReLU = 9; + kRGBImage = 10; + kSoftmaxLoss = 11; + kSlice = 12; + kSplit = 13; + kTanh = 14; + } + // source layer names + repeated string srclayers = 3; // parameters, e.g., weight matrix or bias vector repeated ParamProto param = 12; + // all layers are included in the net structure for training phase by default. + // some layers like data layer for loading test data are not used by training + // phase should be removed by setting the exclude field. + repeated Phase exclude = 15; + // the layer type from the enum above + required LayerType type = 20; + // configuration for convolution layer + optional ConvolutionProto convolution_conf = 30; + // configuration for concatenation layer + optional ConcateProto concate_conf = 31; + // configuration for dropout layer + optional DropoutProto dropout_conf = 33; + // configuration for inner product layer + optional InnerProductProto innerproduct_conf = 34; + // configuration for local response normalization layer + optional DataProto lmdbdata_conf = 35; + // configuration for local response normalization layer + optional LRNProto lrn_conf = 45; + // configuration for mnist parser layer + optional MnistProto mnist_conf= 36; + // configuration for pooling layer + optional PoolingProto pooling_conf = 37; + // configuration for prefetch layer + optional PrefetchProto prefetch_conf = 44; + // configuration for rectified linear unit layer + optional ReLUProto relu_conf = 38; + // configuration for rgb image parser layer + optional RGBImageProto rgbimage_conf = 39; + // configuration for data layer + optional DataProto sharddata_conf = 32; + // configuration for slice layer + optional SliceProto slice_conf = 41; + // configuration for softmax loss layer + optional SoftmaxLossProto softmaxloss_conf = 40; + // configuration for split layer + optional SplitProto split_conf = 42; + // configuration for tanh layer + optional TanhProto tanh_conf = 43; + // partition type which overrides the partition type for neural net + optional PartitionType partition_type = 59; + optional string datablob = 58 [default = "unknow"]; + // names of parameters shared from other layers - repeated string share_param = 13; - // All layers are included in the net structure for training phase by default. - // Layers, e.g., computing performance metrics for test phase, can be excluded - // by this field which defines in which phase this layer should be excluded. - repeated Phase exclude = 20; - // hyper-parameters for layers - optional ConvolutionProto convolution_param = 21; - optional ConcateProto concate_param = 31; - optional DataProto data_param = 22; - optional DropoutProto dropout_param = 23; - optional InnerProductProto inner_product_param = 24; - optional LRNProto lrn_param = 25; - optional MnistProto mnist_param= 26; - optional PoolingProto pooling_param = 27; - optional ReLUProto relu_param = 28; - optional SoftmaxLossProto softmaxloss_param = 29; - optional TanhProto tanh_param = 30; - optional SliceProto slice_param = 32; - optional SplitProto split_param = 33; - optional RGBImage rgbimage_param = 34; - repeated LayerProto sublayers = 35; + repeated string share_param = 60; + // TODO(wangwei): make location ID an array + optional int32 locationid = 61 [default = 0]; + optional int32 partitionid = 62 [default = 0]; +} + +message RGBImageProto { + // scale factor for each pixel + optional float scale = 1 [default = 1.0]; + // size after cropping + optional int32 cropsize = 2 [default = 0]; + // mirror the image + optional bool mirror = 3 [default = false]; + // meanfile path + optional string meanfile = 4 [default = ""]; } -message RGBImage { - optional float scale=1 [default=1.0]; - optional int32 cropsize=2 [default=0]; - optional bool mirror=3 [default=false]; - optional string meanfile=4 [default=""]; +message PrefetchProto{ + repeated LayerProto sublayers = 1; } -message SplitProto{ - required int32 num_splits=1; + +message SplitProto { + optional int32 num_splits = 1 [default =1]; } // scaled tan: A*tan(B*x) message TanhProto { + // A of A*tan(B*x) optional float outer_scale = 1 [default = 1.0]; + // B of A*tan(B*x) optional float inner_scale = 2 [default = 1.0]; } -// Message that stores parameters used by SoftmaxLossProto message SoftmaxLossProto { - // accuracy is not comptued by default, unless topk>0; - // When computing accuracy, count as correct by comparing the true label to - // the top k scoring classes. + // computing accuracy against topk results optional int32 topk = 1 [default = 1]; - optional float scale= 2 [default = 1]; + // loss scale factor + optional float scale= 30 [default = 1]; } -// Message that stores parameters used by ConvolutionLayer message ConvolutionProto { - required uint32 num_filters = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - // The padding size (equal in Y, X) - optional uint32 pad = 3 [default = 0]; - // The stride (equal in Y, X) - optional uint32 stride = 4 [default = 1]; - // The kernel height/width - required uint32 kernel= 5; + // The number of outputs for the layer + required int32 num_filters = 1; + // the kernel height/width + required int32 kernel= 2; + + // The padding height/width + optional int32 pad = 30 [default = 0]; + // the stride + optional int32 stride = 31 [default = 1]; + // whether to have bias terms + optional bool bias_term = 32 [default = true]; } -message ConcateProto{ - required int32 concate_dimension=1; - required int32 concate_num=2; +message ConcateProto { + // on which dimension, starts from 0 + required int32 concate_dimension = 1; + // concatenate offset + optional int32 concate_num = 30; } -// Message that stores parameters used by DataLayer message DataProto { - // path to the data file/folder, absolute or relative to the - // ClusterProto::workspace - required string path=2; - // Specify the batch size. - required uint32 batchsize = 4; + // path to the data file/folder, absolute or relative to the workspace + required string path = 2; + // batch size. + required int32 batchsize = 4; // skip [0,random_skip] records - optional uint32 random_skip = 5 [default = 0]; + optional int32 random_skip = 30 [default = 0]; } message MnistProto { + // normalization x/norm_a + required float norm_a = 1 [default = 1]; + // normalization x-norm_b + required float norm_b = 2 [default = 0]; + // elastic distortion - optional int32 kernel = 1 [default = 0]; - optional float sigma = 2 [default = 0]; - optional float alpha = 3 [default = 0]; + optional int32 kernel = 30 [default = 0]; + optional float sigma = 31 [default = 0]; + optional float alpha = 32 [default = 0]; // rotation or horizontal shearing - optional float beta = 4 [default = 0]; + optional float beta = 33 [default = 0]; // scaling - optional float gamma = 5 [default = 0]; + optional float gamma = 34 [default = 0]; // scale to this size as input for deformation - optional int32 resize = 6 [default = 0] ; - optional int32 elastic_freq = 7 [default = 0]; - optional float norm_a = 8 [default = 1]; - optional float norm_b = 9 [default = 0]; + optional int32 resize = 35 [default = 0] ; + optional int32 elastic_freq = 36 [default = 0]; } // Message that stores parameters used by DropoutLayer message DropoutProto { - optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio + // dropout ratio + optional float dropout_ratio = 30 [default = 0.5]; } // Message that stores parameters used by InnerProductLayer message InnerProductProto { - required uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms + // number of outputs for the layer + required int32 num_output = 1; + // use bias vector or not + optional bool bias_term = 30 [default = true]; } -// Message that stores parameters used by LRNLayer message LRNProto { - optional int32 local_size = 1 [default = 5]; - optional float alpha = 2 [default = 1.]; - optional float beta = 3 [default = 0.75]; + // local response size + required int32 local_size = 1 [default = 5]; + // scale factor + optional float alpha = 31 [default = 1.0]; + // exponential number + optional float beta = 32 [default = 0.75]; enum NormRegion { + // across channels, e.g., r,g,b ACROSS_CHANNELS = 0; + // within channel, e.g., r, g and b are concatenated into one channel WITHIN_CHANNEL = 1; } - optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; - optional float knorm =5 [default = 1.0]; + // normalization objective + optional NormRegion norm_region = 33 [default = ACROSS_CHANNELS]; + // offset + optional float knorm =34 [default = 1.0]; } -// Message that stores parameters used by PoolingLayer message PoolingProto { + // The kernel size (square) + required int32 kernel= 1; enum PoolMethod { MAX = 0; AVE = 1; } // The pooling method - optional PoolMethod pool = 1 [default = MAX]; - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - // The kernel size (square) - required uint32 kernel= 2; - // The padding size (equal in Y, X) - optional uint32 pad = 4 [default = 0]; - // The stride (equal in Y, X) - optional uint32 stride = 3 [default = 1]; + optional PoolMethod pool = 30 [default = MAX]; + // The padding size + optional uint32 pad = 31 [default = 0]; + // The stride + optional uint32 stride = 32 [default = 1]; } message SliceProto{ @@ -280,79 +332,90 @@ message SliceProto{ required int32 slice_num=2; } -// Message that stores parameters used by ReLULayer message ReLUProto { - // Allow non-zero slope for negative inputs to speed up optimization - // Described in: - // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities - // improve neural network acoustic models. In ICML Workshop on Deep Learning - // for Audio, Speech, and Language Processing. + // Ref. Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). + // Rectifier nonlinearities improve neural network acoustic models. + // In ICML Workshop on Deep Learning for Audio, Speech, and Language Processing. optional float negative_slope = 1 [default = 0]; } -message Record { - enum Type { - kSingleLabelImage = 0; +message UpdaterProto { + enum UpdaterType{ + // noraml SGD with momentum and weight decay + kSGD = 1; + // adaptive subgradient, http://www.magicbroom.info/Papers/DuchiHaSi10.pdf + kAdaGrad = 2; + // http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf + kRMSProp = 3; + // Nesterov first optimal gradient method + kNesterov = 4; } - optional Type type = 1 [default = kSingleLabelImage]; - optional SingleLabelImageRecord image = 2; -} + // updater type + required UpdaterType type = 1 [default=kSGD]; + // configuration for RMSProp algorithm + optional RMSPropProto rmsprop_conf = 50; -// to import caffe's lmdb dataset -message Datum { - optional int32 channels = 1; - optional int32 height = 2; - optional int32 width = 3; - // the actual image data, in bytes - optional bytes data = 4; - optional int32 label = 5; - // Optionally, the datum could also hold float data. - repeated float float_data = 6; - // If true data contains an encoded image that need to be decoded - optional bool encoded = 7 [default = false]; -} - -message SingleLabelImageRecord { - repeated int32 shape = 1; - optional int32 label = 2; - optional bytes pixel = 3; - repeated float data = 4; -} - -message UpdaterProto { - optional float momentum = 4 [default = 0]; - optional float weight_decay = 5 [default = 0]; - // used in changing learning rate - optional float gamma = 6 [default=1]; - optional float pow=7 [default=0]; - optional float delta=8 [default=0.0000001]; - optional float rho=9 [default=0.9]; - optional float base_learning_rate=12 [default=0]; - optional float final_learning_rate=13 [default=0]; - optional int32 learning_rate_change_frequency = 14 [default=0]; - enum ChangeProto { + enum ChangeMethod { kFixed = 0; - kInverse_t = 1; + kInverseT = 1; kInverse = 2; kExponential = 3; kLinear = 4; kStep = 5; kFixedStep = 6; } - optional ChangeProto learning_rate_change_method = 16 [default = kFixed]; - optional int32 sync_frequency = 17 [default = 1]; - // warmup the parameters and then send to parameter servers. - optional float moving_rate = 26 [default = 0]; - optional string param_type = 27 [default = "Param"]; + // change method for learning rate + required ChangeMethod lr_change= 2 [default = kFixed]; + + optional FixedStepProto fixedstep_conf=40; + optional StepProto step_conf=41; + optional LinearProto linear_conf=42; + optional ExponentialProto exponential_conf=43; + optional InverseProto inverse_conf=44; + optional InverseTProto inverset_conf=45; + + optional float momentum = 31 [default = 0]; + optional float weight_decay = 32 [default = 0]; + // base learning rate + optional float base_lr = 34 [default = 0]; + // used to avoid divide by 0, i.e. x/(y+delta) + optional float delta = 35 [default = 0.00000001]; +} + +message RMSPropProto{ + // history=history*rho_+(1-rho_)*(grad*grad_scale); + required float rho = 1; +} + +message FixedStepProto{ repeated int32 step = 28; + // lr = step_lr[i] if current step >= step[i] repeated float step_lr = 29; } -message BlobProto { - optional int32 num = 1 [default = 0]; - optional int32 channels = 2 [default = 0]; - optional int32 height = 3 [default = 0]; - optional int32 width = 4 [default = 0]; - repeated float data = 5 [packed = true]; - repeated float diff = 6 [packed = true]; +message StepProto{ + // lr = base_lr * gamma^(step/change_freq) + required float gamma = 35 [default = 1]; + // lr = base_lr * gamma^(step/change_freq) + required int32 change_freq= 40; +} +message LinearProto{ + // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr + required int32 change_freq= 40; + // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr + required float final_lr = 39; +} +message ExponentialProto{ + // lr = base / 2^(step/change_freq) + required int32 change_freq= 40; +} +message InverseTProto{ + // lr = base_lr / (1+step/final_lr) + required float final_lr = 39; +} +message InverseProto{ + // lr = base_lr*(1+gamma*step)^(-pow) + required float gamma = 1 [default = 1]; + // lr = base_lr*(1+gamma*step)^(-pow) + required float pow = 2 [default = 0]; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/trainer/trainer.cc ---------------------------------------------------------------------- diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc index 11499db..2a89de2 100644 --- a/src/trainer/trainer.cc +++ b/src/trainer/trainer.cc @@ -3,6 +3,7 @@ #include <map> #include <queue> #include <glog/logging.h> +#include "proto/common.pb.h" #include "trainer/trainer.h" #include "mshadow/tensor.h" using std::vector; @@ -276,6 +277,8 @@ void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto, Run(workers.size(), servers.size()); for(auto& thread: threads) thread.join(); + for(auto x: ctx) + delete x; } void Trainer::Run(int nworkers, int nservers){ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/acfc4097/src/utils/updater.cc ---------------------------------------------------------------------- diff --git a/src/utils/updater.cc b/src/utils/updater.cc index ae7d582..80e3619 100644 --- a/src/utils/updater.cc +++ b/src/utils/updater.cc @@ -9,45 +9,44 @@ using namespace mshadow::expr; namespace singa { float Updater::GetLearningRate(int step){ - float ret = 0., r = 0., base=proto_.base_learning_rate(); + float ret = 0., r = 0., base=proto_.base_lr(); int freq=0; - switch (proto_.learning_rate_change_method()) { - case UpdaterProto_ChangeProto_kFixed: + switch (proto_.lr_change()) { + case UpdaterProto_ChangeMethod_kFixed: ret = base; break; - case UpdaterProto_ChangeProto_kLinear: + case UpdaterProto_ChangeMethod_kLinear: // a is init, b is the final - freq=proto_.learning_rate_change_frequency(); + freq=proto_.linear_conf().change_freq(); r = step * 1.0 / freq; - ret = (1.0 - r) * base + r * proto_.final_learning_rate(); + ret = (1.0 - r) * base + r * proto_.linear_conf().final_lr(); break; - case UpdaterProto_ChangeProto_kExponential: + case UpdaterProto_ChangeMethod_kExponential: // a is init, b is the final, from convnet - CHECK_EQ(base, 2 * proto_.final_learning_rate()) - << "final value should be the half"; - freq=proto_.learning_rate_change_frequency(); + freq=proto_.exponential_conf().change_freq(); ret = base / pow(2, step * 1. / freq); break; - case UpdaterProto_ChangeProto_kInverse_t: + case UpdaterProto_ChangeMethod_kInverseT: // a is init, b is the final, from convnet - CHECK_EQ(base, 2 * proto_.final_learning_rate()) + CHECK_EQ(base, 2 * proto_.inverset_conf().final_lr()) << "final value should be the half"; - ret = base / (1. + step * 1. / proto_.final_learning_rate()); + ret = base / (1. + step * 1. / proto_.inverset_conf().final_lr()); break; - case UpdaterProto_ChangeProto_kInverse: + case UpdaterProto_ChangeMethod_kInverse: // a is init, b is gamma, c is pow - ret=base*pow(1.f+proto_.gamma()*step, -proto_.pow()); + ret=base*pow(1.f+proto_.inverse_conf().gamma()*step, + -proto_.inverse_conf().pow()); break; - case UpdaterProto_ChangeProto_kStep: + case UpdaterProto_ChangeMethod_kStep: // a is the base learning rate, b is gamma, from caffe // notice it is step/change_steps, not step*1.0/change_steps - freq=proto_.learning_rate_change_frequency(); - ret = base * pow(proto_.gamma(), step / freq); + freq=proto_.step_conf().change_freq(); + ret = base * pow(proto_.step_conf().gamma(), step / freq); break; - case UpdaterProto_ChangeProto_kFixedStep: - for(int i=0;i<proto_.step_size();i++){ - if(step>proto_.step(i)) - ret=proto_.step_lr(i); + case UpdaterProto_ChangeMethod_kFixedStep: + for(int i=0;i<proto_.fixedstep_conf().step_size();i++){ + if(step>proto_.fixedstep_conf().step(i)) + ret=proto_.fixedstep_conf().step_lr(i); } break; default: @@ -59,7 +58,7 @@ float Updater::GetLearningRate(int step){ /***********************SGD with momentum******************************/ void SGDUpdater::Init(const UpdaterProto& proto){ Updater::Init(proto); - base_lr_=proto.base_learning_rate(); + base_lr_=proto.base_lr(); //CHECK_GT(base_lr_, 0); momentum_=proto.momentum(); weight_decay_=proto.weight_decay(); @@ -88,7 +87,7 @@ void SGDUpdater::Update(int step, shared_ptr<Param> param, float grad_scale){ /***********************Nesterov******************************/ void NesterovUpdater::Init(const UpdaterProto& proto){ Updater::Init(proto); - base_lr_=proto.base_learning_rate(); + base_lr_=proto.base_lr(); CHECK_GT(base_lr_, 0); weight_decay_=proto.weight_decay(); } @@ -113,7 +112,7 @@ void NesterovUpdater::Update(int step, shared_ptr<Param> param, float grad_scale /***********************AdaGrad******************************/ void AdaGradUpdater::Init(const UpdaterProto& proto){ Updater::Init(proto); - base_lr_=proto.base_learning_rate(); + base_lr_=proto.base_lr(); CHECK_GT(base_lr_, 0); delta_=proto.delta(); weight_decay_=proto.weight_decay(); @@ -137,10 +136,10 @@ void AdaGradUpdater::Update(int step, shared_ptr<Param> param, float grad_scale) /***********************RMSProp******************************/ void RMSPropUpdater::Init(const UpdaterProto& proto){ Updater::Init(proto); - base_lr_=proto.base_learning_rate(); + base_lr_=proto.base_lr(); CHECK_GT(base_lr_, 0); delta_=proto.delta(); - rho_=proto.rho(); + rho_=proto.rmsprop_conf().rho(); weight_decay_=proto.weight_decay(); }
