SINGA-98 Add Support for AlexNet ImageNet Classification Model Update job.conf for alexnet: learning rate, layer order, lr_scale/wd_scale; add cudnn.conf.
Fix a bug in image_preprocess.cc which sets the dst pointer incorrectly. It leads to the observation that the loss and accuracy does not improve after a few iterations; (the loss is about 6.90x, tested for about 10k iterations); Cafffe's performance starts improving after 3000 iterations (is around 6.90x during 200-3500 iterations). After fixing the bug, training using mini-batch 128 works, but the loss starts reducing after around 10k steps. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6e815db3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6e815db3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6e815db3 Branch: refs/heads/master Commit: 6e815db34b2ca8808ef020071d689043c1e7c469 Parents: fe86b02 Author: Wei Wang <[email protected]> Authored: Sat Dec 26 15:52:13 2015 +0800 Committer: Wei Wang <[email protected]> Committed: Wed Jan 6 00:49:19 2016 +0800 ---------------------------------------------------------------------- examples/alexnet/cudnn.conf | 434 ++++++++++++++++++++ examples/alexnet/job.conf | 103 +++-- examples/cifar10/cudnn.conf | 5 +- include/singa/utils/math_blob.h | 6 + include/singa/utils/math_kernel.h | 3 +- src/neuralnet/connection_layer/slice.cc | 4 +- src/neuralnet/input_layer/image_preprocess.cc | 26 +- src/neuralnet/input_layer/record.cc | 2 +- src/neuralnet/layer.cc | 6 +- src/neuralnet/neuron_layer/activation.cc | 12 +- src/neuralnet/neuron_layer/cudnn_activation.cc | 19 +- src/neuralnet/neuron_layer/cudnn_lrn.cc | 2 - src/neuralnet/neuron_layer/dropout.cc | 2 + src/neuralnet/neuron_layer/lrn.cc | 10 +- src/proto/job.proto | 1 - src/utils/image_transform.cc | 4 +- src/utils/updater.cc | 9 +- src/worker.cc | 2 +- 18 files changed, 566 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/alexnet/cudnn.conf ---------------------------------------------------------------------- diff --git a/examples/alexnet/cudnn.conf b/examples/alexnet/cudnn.conf new file mode 100644 index 0000000..eef20f9 --- /dev/null +++ b/examples/alexnet/cudnn.conf @@ -0,0 +1,434 @@ +name: "alexnet" +train_steps: 450000 +#test_steps: 500 +#test_freq: 1000 +disp_freq: 20 +checkpoint_freq: 100000 +checkpoint_after: 100000 +gpu: 2 +#debug: true +#checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0" +train_one_batch { + alg: kBP +} +updater{ + type: kSGD + weight_decay: 0.0005 + momentum: 0.9 + learning_rate { + type: kStep + base_lr: 0.01 + step_conf { + gamma: 0.1 + change_freq: 100000 + } + } +} +neuralnet { + layer{ + name: "data" + type: kRecordInput + store_conf { + backend: "kvfile" + path :"/data/dataset/imagenet/train_record.bin" + mean_file: "/data/dataset/imagenet/image_mean.bin" + batchsize: 256 + #random_skip: 1000 + shape: 3 + shape: 256 + shape: 256 + } + include: kTrain + } + layer{ + name: "data" + type: kRecordInput + store_conf { + backend: "kvfile" + path :"/data/dataset/imagenet/val_record.bin" + mean_file: "/data/dataset/imagenet/image_mean.bin" + batchsize: 100 + shape: 3 + shape: 256 + shape: 256 + } + include: kTest + } + layer{ + name: "image" + type: kImagePreprocess + rgbimage_conf { + cropsize: 227 + mirror: true + } +# partition_dim: 0 + srclayers: "data" + } + layer{ + name: "conv1" + type: kCudnnConv + srclayers: "image" + convolution_conf { + num_filters: 96 + kernel: 11 + stride: 4 + } +# partition_dim: 0 + param { + name: "w1" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b1" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 0 + } + } + } + layer { + name: "relu1" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "conv1" +# partition_dim: 0 + } + layer { + name: "pool1" + type: kCudnnPool + pooling_conf { + pool: MAX + kernel: 3 + stride: 2 + } + srclayers: "relu1" +# partition_dim: 0 + } + layer { + name: "norm1" + type: kCudnnLRN + lrn_conf { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + knorm: 1 + } + srclayers: "pool1" +# partition_dim: 0 + } + + layer{ + name: "conv2" + type: kCudnnConv + srclayers: "norm1" + convolution_conf { + num_filters: 256 + kernel: 5 + pad: 2 + } +# partition_dim: 0 + param { + name: "w2" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b2" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 1 + } + } + } + layer { + name: "relu2" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "conv2" +# partition_dim: 0 + } + layer { + name: "pool2" + type: kCudnnPool + pooling_conf { + pool: MAX + kernel: 3 + stride: 2 + } + srclayers: "relu2" +# partition_dim: 0 + } + + layer { + name: "norm2" + type: kCudnnLRN + lrn_conf { + local_size: 5 + alpha: 0.0001 + beta: 0.75 + knorm: 1 + } + srclayers: "pool2" +# partition_dim: 0 + } + layer{ + name: "conv3" + type: kCudnnConv + srclayers: "norm2" + convolution_conf { + num_filters: 384 + kernel: 3 + pad: 1 + } +# partition_dim: 0 + param { + name: "w3" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b3" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 0 + } + } + } + layer { + name: "relu3" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "conv3" +# partition_dim: 0 + } + layer{ + name: "conv4" + type: kCudnnConv + srclayers: "relu3" + convolution_conf { + num_filters: 384 + kernel: 3 + pad: 1 + } +# partition_dim: 0 + param { + name: "w4" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b4" + lr_scale:2 + wd_scale:0 + init { + type: kConstant + value: 1 + } + } + } + layer { + name: "relu4" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "conv4" +# partition_dim: 0 + } + layer{ + name: "conv5" + type: kCudnnConv + srclayers: "relu4" + convolution_conf { + num_filters: 256 + kernel: 3 + pad: 1 + } +# partition_dim: 0 + param { + name: "w5" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b5" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 1 + } + } + } + layer { + name: "relu5" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "conv5" +# partition_dim: 0 + } + layer { + name: "pool5" + type: kCudnnPool + pooling_conf { + pool: MAX + kernel: 3 + stride: 2 + } + srclayers: "relu5" +# partition_dim: 0 + } + layer { + name: "ip6" + type: kInnerProduct + innerproduct_conf { + num_output: 4096 + } + param { + name: "w6" + init { + type: kGaussian + std: 0.005 + } + } + param { + name: "b6" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 1 + } + } + srclayers: "pool5" +# partition_dim: 1 + } + layer { + name: "relu6" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "ip6" +# partition_dim: 1 + } + layer { + name: "drop6" + type: kDropout + srclayers: "relu6" +# partition_dim: 1 + } + layer { + name: "ip7" + type: kInnerProduct + innerproduct_conf { + num_output: 4096 + } +# partition_dim: 1 + param { + name: "w7" + init { + type: kGaussian + std: 0.005 + } + } + param { + name: "b7" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 1 + } + } + srclayers: "drop6" + } + layer { + name: "relu7" + type: kCudnnActivation + activation_conf { + type: RELU + } + share_src_blobs: true + srclayers: "ip7" +# partition_dim: 1 + } + layer { + name: "drop7" + type: kDropout + srclayers: "relu7" +# partition_dim: 1 + } + layer { + name: "ip8" + type: kInnerProduct + innerproduct_conf { + num_output: 1000 + } +# partition_dim: 1 + param { + name: "w8" + init { + type: kGaussian + std: 0.01 + } + } + param { + name: "b8" + lr_scale: 2 + wd_scale: 0 + init { + type: kConstant + value: 0 + } + } + srclayers: "drop7" + } + layer { + name: "loss" + type: kCudnnSoftmaxLoss + softmaxloss_conf { + topk:1 + } + srclayers: "ip8" + srclayers: "data" + } +} +cluster { + nworker_groups: 1 + nserver_groups: 1 + nworkers_per_group: 1 + nworkers_per_procs: 1 + workspace: "examples/alexnet" +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/alexnet/job.conf ---------------------------------------------------------------------- diff --git a/examples/alexnet/job.conf b/examples/alexnet/job.conf index b8ac6c7..1898a09 100644 --- a/examples/alexnet/job.conf +++ b/examples/alexnet/job.conf @@ -1,8 +1,9 @@ name: "alexnet" -train_steps: 100000 -test_steps: 10 +train_steps: 100 +test_steps: 0 test_freq: 300 -disp_freq: 100 +disp_freq: 5 +#debug: true #checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0" train_one_batch { alg: kBP @@ -13,7 +14,7 @@ updater{ momentum: 0.9 learning_rate { type: kFixed - base_lr: 0.9 + base_lr: 0.01 } } neuralnet { @@ -22,10 +23,10 @@ neuralnet { type: kRecordInput store_conf { backend: "kvfile" - path :"/data/dataset/train_record.bin" - mean_file: "/data/dataset/train_mean.bin" - batchsize: 256 - random_skip: 5000 + path :"/data/dataset/imagenet/train_record.bin" + mean_file: "/data/dataset/imagenet/image_mean.bin" + batchsize: 32 + #random_skip: 5000 shape: 3 shape: 256 shape: 256 @@ -38,13 +39,13 @@ neuralnet { store_conf { backend: "kvfile" path :"/data/dataset/val_record.bin" - mean_file: "/data/dataset/val_mean.bin" + mean_file: "/data/dataset/image_mean.bin" batchsize: 256 shape: 3 shape: 256 shape: 256 } - include: kTest + include: kTest } layer{ name: "image" @@ -75,6 +76,8 @@ neuralnet { } param { name: "b1" + lr_scale: 2 + wd_scale: 0 init { type: kConstant value: 0 @@ -88,6 +91,17 @@ neuralnet { # partition_dim: 0 } layer { + name: "pool1" + type: kCPooling + pooling_conf { + pool: MAX + kernel: 3 + stride: 2 + } + srclayers: "relu1" +# partition_dim: 0 + } + layer { name: "norm1" type: kLRN lrn_conf { @@ -96,24 +110,14 @@ neuralnet { beta: 0.75 knorm: 2 } - srclayers: "relu1" -# partition_dim: 0 - } - layer { - name: "pool1" - type: kCPooling - pooling_conf { - pool: MAX - kernel: 3 - stride: 2 - } - srclayers: "norm1" + srclayers: "pool1" # partition_dim: 0 } + layer{ name: "conv2" type: kCConvolution - srclayers: "pool1" + srclayers: "norm1" convolution_conf { num_filters: 256 kernel: 5 @@ -129,9 +133,11 @@ neuralnet { } param { name: "b2" + lr_scale: 2 + wd_scale: 0 init { type: kConstant - value: 0 + value: 1 } } } @@ -142,6 +148,18 @@ neuralnet { # partition_dim: 0 } layer { + name: "pool2" + type: kCPooling + pooling_conf { + pool: MAX + kernel: 3 + stride: 2 + } + srclayers: "relu2" +# partition_dim: 0 + } + + layer { name: "norm2" type: kLRN lrn_conf { @@ -150,24 +168,13 @@ neuralnet { beta: 0.75 knorm: 2 } - srclayers: "relu2" -# partition_dim: 0 - } - layer { - name: "pool2" - type: kCPooling - pooling_conf { - pool: MAX - kernel: 3 - stride: 2 - } - srclayers: "norm2" + srclayers: "pool2" # partition_dim: 0 } layer{ name: "conv3" type: kCConvolution - srclayers: "pool2" + srclayers: "norm2" convolution_conf { num_filters: 384 kernel: 3 @@ -183,6 +190,8 @@ neuralnet { } param { name: "b3" + lr_scale: 2 + wd_scale: 0 init { type: kConstant value: 0 @@ -214,9 +223,11 @@ neuralnet { } param { name: "b4" + lr_scale:2 + wd_scale:0 init { type: kConstant - value: 0 + value: 1 } } } @@ -245,9 +256,11 @@ neuralnet { } param { name: "b5" + lr_scale: 2 + wd_scale: 0 init { type: kConstant - value: 0 + value: 1 } } } @@ -278,11 +291,13 @@ neuralnet { name: "w6" init { type: kGaussian - std: 0.01 + std: 0.005 } } param { name: "b6" + lr_scale: 2 + wd_scale: 0 init { type: kConstant value: 1 @@ -314,11 +329,13 @@ neuralnet { name: "w7" init { type: kGaussian - std: 0.01 + std: 0.005 } } param { name: "b7" + lr_scale: 2 + wd_scale: 0 init { type: kConstant value: 1 @@ -354,9 +371,11 @@ neuralnet { } param { name: "b8" + lr_scale: 2 + wd_scale: 0 init { type: kConstant - value: 1 + value: 0 } } srclayers: "drop7" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/examples/cifar10/cudnn.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/cudnn.conf b/examples/cifar10/cudnn.conf index 49132d8..136435b 100644 --- a/examples/cifar10/cudnn.conf +++ b/examples/cifar10/cudnn.conf @@ -5,7 +5,7 @@ test_freq: 1000 #validate_steps: 100 #validate_freq: 300 disp_freq: 200 -gpu: 0 +gpu: 2 #checkpoint_path: "examples/cifar10/checkpoint/step1000-worker0" train_one_batch { alg: kBP @@ -116,6 +116,7 @@ neuralnet { activation_conf { type: RELU } + share_src_blobs: true srclayers:"pool1" } layer { @@ -161,6 +162,7 @@ neuralnet { activation_conf { type: RELU } + share_src_blobs: true srclayers:"conv2" } layer { @@ -216,6 +218,7 @@ neuralnet { activation_conf { type: RELU } + share_src_blobs: true srclayers:"conv3" } layer { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/include/singa/utils/math_blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h index 301b72f..34e67ad 100644 --- a/include/singa/utils/math_blob.h +++ b/include/singa/utils/math_blob.h @@ -306,6 +306,9 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) { cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data()); } else { #ifdef USE_GPU + gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data()); +#else + LOG(FATAL) << "Not implemented"; #endif // USE_GPU } } @@ -324,6 +327,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B, C->mutable_cpu_data()); } else { #ifdef USE_GPU + LOG(ERROR) << "Not implemented"; #endif // USE_GPU } } @@ -670,6 +674,8 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) { #ifdef USE_GPU gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high, A->mutable_gpu_data()); +#else + LOG(FATAL) << "Not implemented"; #endif } } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/include/singa/utils/math_kernel.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/math_kernel.h b/include/singa/utils/math_kernel.h index 8eb7cf5..0239d3d 100644 --- a/include/singa/utils/math_kernel.h +++ b/include/singa/utils/math_kernel.h @@ -79,7 +79,8 @@ extern "C" { void singa_gpu_set_value(float *data, float value, int n); - void singa_gpu_threshold(const float *src_data, float *des_data, int n); + void singa_gpu_threshold(const float *src_data, float *des_data, + float alpha, int n); }; } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/connection_layer/slice.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/connection_layer/slice.cc b/src/neuralnet/connection_layer/slice.cc index 15fdc48..efa33a4 100644 --- a/src/neuralnet/connection_layer/slice.cc +++ b/src/neuralnet/connection_layer/slice.cc @@ -155,11 +155,11 @@ const std::string SliceLayer::ToString(bool debug, int flag) { string ret = ""; if ((flag & kForward) == kForward && data_.count() !=0) { for (unsigned k = 0; k < datavec_.size(); k++) - ret += StringPrintf("data-%u :%13.9f ", k, Asum(*datavec_.at(k))); + ret += StringPrintf("data-%u :%e ", k, Asum(*datavec_.at(k))); } if ((flag & kBackward) == kBackward && grad_.count() != 0) { for (unsigned k = 0; k < gradvec_.size(); k++) - ret += StringPrintf("grad-%u:%13.9f ", k, Asum(*gradvec_.at(k))); + ret += StringPrintf("grad-%u:%e ", k, Asum(*gradvec_.at(k))); } return ret; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/input_layer/image_preprocess.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/input_layer/image_preprocess.cc b/src/neuralnet/input_layer/image_preprocess.cc index c63c957..6f2e094 100644 --- a/src/neuralnet/input_layer/image_preprocess.cc +++ b/src/neuralnet/input_layer/image_preprocess.cc @@ -39,7 +39,7 @@ void ImagePreprocessLayer::Setup(const LayerProto& conf, const auto& shape = src.shape(); CHECK_EQ(shape.size(), 4); CHECK_EQ(shape.at(2), shape.at(3)); - if (cropsize_ != 0 && cropsize_ != shape.at(2)) { + if (cropsize_ && (cropsize_ != shape.at(2) || cropsize_ != shape.at(3))) { data_.Reshape(vector<int>{shape.at(0), shape.at(1), cropsize_, cropsize_}); } else { data_ = src; @@ -49,29 +49,29 @@ void ImagePreprocessLayer::Setup(const LayerProto& conf, void ImagePreprocessLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) { const auto& srcdata = srclayers.at(0)->data(this); - int batchsize = srcdata.shape()[0], channel = srcdata.shape()[1]; - int height = srcdata.shape()[2], width = srcdata.shape()[3]; + int batchsize = srcdata.shape(0), channel = srcdata.shape(1); + int height = srcdata.shape(2), width = srcdata.shape(3); + int srcimage_size = channel * height * width; + int image_size = channel * data_.shape(2) * data_.shape(3); + std::uniform_int_distribution<int> rand1(0, height - cropsize_); + std::uniform_int_distribution<int> rand2(0, width - cropsize_); + auto generator = Singleton<Context>::Instance()->rand_generator(); + const float* srcdptr = srcdata.cpu_data(); float* dptr = data_.mutable_cpu_data(); - int srcimage_size = channel * height * width; - int image_size = channel * data_.shape()[2] * data_.shape()[3]; - std::uniform_int_distribution<int> rand1(0, srcdata.shape()[1] - cropsize_); - std::uniform_int_distribution<int> rand2(0, srcdata.shape()[2] - cropsize_); - auto generator = - Singleton<Context>::Instance()->rand_generator(std::this_thread::get_id()); for (int k = 0; k < batchsize; k++) { int h_offset = 0, w_offset = 0; - if (cropsize_> 0 && ((flag & kTrain) == kTrain)) { + if (cropsize_> 0 && (flag & kTrain)) { h_offset = rand1(*generator); w_offset = rand2(*generator); } bool do_mirror = mirror_ && (rand1(*generator) % 2) - && ((flag & kTrain) == kTrain); + && (flag & kTrain); ImageTransform(srcdptr + k * srcimage_size, nullptr, do_mirror, cropsize_, - cropsize_, h_offset, w_offset, srcdata.shape()[1], height, width, - scale_, dptr + image_size); + cropsize_, h_offset, w_offset, channel, height, width, + scale_, dptr + k * image_size); } } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/input_layer/record.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/input_layer/record.cc b/src/neuralnet/input_layer/record.cc index 2fb71c6..b14fc80 100644 --- a/src/neuralnet/input_layer/record.cc +++ b/src/neuralnet/input_layer/record.cc @@ -32,7 +32,7 @@ void RecordInputLayer::Setup(const LayerProto& conf, } void RecordInputLayer::LoadRecord(const string& backend, - const string&path, Blob<float>* to) { + const string& path, Blob<float>* to) { io::Store* store = io::OpenStore(backend, path, io::kRead); string key, val; CHECK(store->Read(&key, &val)); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc index 3698b21..ac673dd 100644 --- a/src/neuralnet/layer.cc +++ b/src/neuralnet/layer.cc @@ -48,15 +48,15 @@ const std::string Layer::ToString(bool debug, int flag) { return ""; string ret = ""; if ((flag & kForward) == kForward && data_.count() !=0) { - ret += StringPrintf("data:%13.9f ", Asum(data_)); + ret += StringPrintf("data:%e ", Asum(data_)); for (Param* p : GetParams()) ret += StringPrintf("%s:%13.9f ", p->name().c_str(), Asum(p->data())); } if ((flag & kBackward) == kBackward && grad_.count() != 0) { - ret += StringPrintf("grad:%13.9f ", Asum(grad_)); + ret += StringPrintf("grad:%e ", Asum(grad_)); for (Param* p : GetParams()) - ret += StringPrintf("%s:%13.9f ", + ret += StringPrintf("%13.9f ", p->name().c_str(), Asum(p->grad())); } return ret; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/activation.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer/activation.cc b/src/neuralnet/neuron_layer/activation.cc index 492e453..f75961e 100644 --- a/src/neuralnet/neuron_layer/activation.cc +++ b/src/neuralnet/neuron_layer/activation.cc @@ -28,7 +28,11 @@ void ActivationLayer::Setup(const LayerProto& conf, const vector<Layer*>& srclayers) { NeuronLayer::Setup(conf, srclayers); data_.ReshapeLike(srclayers[0]->data(this)); - grad_.ReshapeLike(*(srclayers[0]->mutable_grad(this))); + grad_.ReshapeLike(data_); + if (conf.share_src_blobs()) { + data_.ShareData(srclayers[0]->mutable_data(this), false); + grad_.ShareData(srclayers[0]->mutable_grad(this), false); + } } void ActivationLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) { @@ -57,7 +61,7 @@ ActivationLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) { Blob<float> * gsrc = srclayers[0]->mutable_grad(this); switch (layer_conf_.activation_conf().type()) { case RELU: - Map<op::Relu<float>, float>(data_, gsrc); + Map<op::ReluGrad<float>, float>(data_, gsrc); Mult(*gsrc, grad_, gsrc); break; case SIGMOID: @@ -65,12 +69,12 @@ ActivationLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) { Mult(*gsrc, grad_, gsrc); break; case TANH: - Map<op::Tanh<float>, float>(data_, gsrc); + Map<op::TanhGrad<float>, float>(data_, gsrc); Mult(*gsrc, grad_, gsrc); break; /* case ActivationType_STANH: - Map<op::STanh<float>, float>(data_, gsrc); + Map<op::STanhGrad<float>, float>(data_, gsrc); Mult(*gsrc, grad_, gsrc); break; */ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/cudnn_activation.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer/cudnn_activation.cc b/src/neuralnet/neuron_layer/cudnn_activation.cc index 5405b53..12b3d48 100644 --- a/src/neuralnet/neuron_layer/cudnn_activation.cc +++ b/src/neuralnet/neuron_layer/cudnn_activation.cc @@ -38,25 +38,34 @@ void CudnnActivationLayer::InitCudnn() { const auto& shape = data_.shape(); CHECK_GT(shape.size(), 0); + // TODO(wangwei) cudnnSetTensorNdDescriptor reports error if nbdim is < 4. + const int nbdim = 4; // size of each dimension - int* sdim = new int[shape.size()]; - int* stride = new int[shape.size()]; - stride[shape.size() -1] = 1; + int* sdim = new int[nbdim]; + int* stride = new int[nbdim]; int i = shape.size() - 1; sdim[i] = shape[i]; stride[i] = 1; + // LOG(ERROR) << "layer " << name(); + // LOG(ERROR) << sdim[i] << " " << stride[i]; for (--i; i >= 0; i--) { sdim[i] = shape[i]; stride[i] = shape[i + 1] * stride[i + 1]; + // LOG(ERROR) << sdim[i] << " " << stride[i]; + } + // padding sdim and stride to 4 dimensions + for (i = shape.size(); i < nbdim; i++) { + sdim[i] = 1; + stride[i] = 1; } CHECK_CUDNN(cudnnSetTensorNdDescriptor(src_desc_, CUDNN_DATA_FLOAT, - shape.size(), + nbdim, sdim, stride)); CHECK_CUDNN(cudnnSetTensorNdDescriptor(my_desc_, CUDNN_DATA_FLOAT, - shape.size(), + nbdim, sdim, stride)); delete[] sdim; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/cudnn_lrn.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer/cudnn_lrn.cc b/src/neuralnet/neuron_layer/cudnn_lrn.cc index 8237b13..fb8e476 100644 --- a/src/neuralnet/neuron_layer/cudnn_lrn.cc +++ b/src/neuralnet/neuron_layer/cudnn_lrn.cc @@ -37,7 +37,6 @@ void CudnnLRNLayer::InitCudnn() { alpha_, beta_, knorm_)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&src_desc_)); CHECK_CUDNN(cudnnSetTensor4dDescriptor(src_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, @@ -45,7 +44,6 @@ void CudnnLRNLayer::InitCudnn() { channels_, height_, width_)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&my_desc_)); CHECK_CUDNN(cudnnSetTensor4dDescriptor(my_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/dropout.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer/dropout.cc b/src/neuralnet/neuron_layer/dropout.cc index 6e0db76..706b999 100644 --- a/src/neuralnet/neuron_layer/dropout.cc +++ b/src/neuralnet/neuron_layer/dropout.cc @@ -48,12 +48,14 @@ void DropoutLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) { Blob<float> rand(data_.count()); SampleUniform(0.0f, 1.0f, &rand); Map<op::Threshold<float>, float>(pkeep, rand, &mask_); + // scale the mask to avoid scaling in ComputeGradient Scale(1.0f / pkeep, &mask_); Mult(srclayers[0]->data(this), mask_, &data_); } void DropoutLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) { Mult(grad_, mask_, srclayers[0]->mutable_grad(this)); + // no need to mult scale as mask is scaled already. } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/neuralnet/neuron_layer/lrn.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer/lrn.cc b/src/neuralnet/neuron_layer/lrn.cc index 9a5ba37..ce96d11 100644 --- a/src/neuralnet/neuron_layer/lrn.cc +++ b/src/neuralnet/neuron_layer/lrn.cc @@ -64,9 +64,13 @@ void LRNLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) { auto grad = Tensor4(&grad_); auto gsrc = Tensor4(srclayers[0]->mutable_grad(this)); - gsrc = grad * expr::F<op::power>(norm, -beta_); - gsrc += (- 2.0f * beta_ * salpha) * expr::chpool<red::sum>( - grad * src * expr::F<op::power>(norm, -beta_ - 1.0f), lsize_) * src; + gsrc = grad * expr::F<op::power>(norm, -beta_ ); + Tensor<cpu, 4> tmp(gsrc.shape); + AllocSpace(tmp); + tmp = gsrc * src / norm; + gsrc += ( - 2.0f * beta_ * salpha ) * expr::chpool<red::sum>(tmp, lsize_ ) + * src; + FreeSpace(tmp); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/proto/job.proto ---------------------------------------------------------------------- diff --git a/src/proto/job.proto b/src/proto/job.proto index 22d4bc5..03ee327 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -192,7 +192,6 @@ message LayerProto { optional LayerType type = 20 [default = kUserLayer]; // type of user layer optional string user_type = 21; - // overrides the partition dimension for neural net optional int32 partition_dim = 60 [default = -1]; // names of parameters shared from other layers http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/utils/image_transform.cc ---------------------------------------------------------------------- diff --git a/src/utils/image_transform.cc b/src/utils/image_transform.cc index 36bec39..28d5f4c 100644 --- a/src/utils/image_transform.cc +++ b/src/utils/image_transform.cc @@ -26,11 +26,11 @@ void ImageTransform(const float* in, const float* mean, bool mirror, int h_crop, int w_crop, int h_offset, int w_offset, int channel, int height, int width, float scale, float* out) { if (h_crop == 0) { - CHECK_NE(h_offset, 0); + CHECK_EQ(h_offset, 0); h_crop = height; } if (w_crop ==0) { - CHECK_NE(w_offset, 0); + CHECK_EQ(w_offset, 0); w_crop = width; } CHECK_NE(scale, 0); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/utils/updater.cc ---------------------------------------------------------------------- diff --git a/src/utils/updater.cc b/src/utils/updater.cc index ef6d983..bb055c2 100644 --- a/src/utils/updater.cc +++ b/src/utils/updater.cc @@ -7,9 +7,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at -* +* * http://www.apache.org/licenses/LICENSE-2.0 -* +* * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -59,7 +59,10 @@ float FixedStepLRGen::Get(int step) { float StepLRGen::Get(int step) { // do not cast int to float int freq = proto_.step_conf().change_freq(); - return proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq); + float lr = proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq); + LOG_IF(ERROR, step % freq == 0) << "Update learning rate to " << lr + << " @ step " << step; + return lr; } float LinearLRGen::Get(int step) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6e815db3/src/worker.cc ---------------------------------------------------------------------- diff --git a/src/worker.cc b/src/worker.cc index c240e84..8495b5c 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -344,7 +344,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) { Collect(step, p); } } - // LOG(ERROR) << layer->name() << " forward"; + // DLOG(ERROR) << "Forward " << layer->name(); layer->ComputeFeature(phase | kForward, net->srclayers(layer)); if (job_conf_.debug() && grp_id_ == 0) label[layer->name()] = layer->ToString(true, phase | kForward);
