Repository: incubator-singa Updated Branches: refs/heads/master 077d3804f -> d5d817e14
SINGA-51 Improve the convolution and pooling operations Caffe's im2col is adopted to speed up the convolution operation. The max pooling operation is accelerated by book-keeping the max neuron position like Caffe. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d59eecf Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d59eecf Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d59eecf Branch: refs/heads/master Commit: 6d59eecf0502a0f3575770cb166be94775cf83f2 Parents: 50deedd Author: Wei Wang <[email protected]> Authored: Sun Sep 13 20:00:02 2015 +0800 Committer: Wei Wang <[email protected]> Committed: Sun Sep 13 20:00:02 2015 +0800 ---------------------------------------------------------------------- examples/cifar10/job.conf | 20 +- examples/mnist/conv.conf | 8 +- include/neuralnet/neuron_layer.h | 21 +++ include/utils/common.h | 42 ++++- src/driver.cc | 2 + src/neuralnet/neuron_layer.cc | 82 +++++++- src/proto/job.proto | 5 +- src/trainer/trainer.cc | 1 - src/utils/blob.cc | 2 +- src/utils/common.cc | 344 +++++++++++++++++++++++++++++----- 10 files changed, 451 insertions(+), 76 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/cifar10/job.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf index 0fdd244..b36c45a 100644 --- a/examples/cifar10/job.conf +++ b/examples/cifar10/job.conf @@ -27,7 +27,7 @@ neuralnet { type: kShardData sharddata_conf { path: "examples/cifar10/cifar10_train_shard" - batchsize: 16 + batchsize: 64 random_skip: 5000 } exclude: kTest @@ -57,7 +57,7 @@ neuralnet { layer { name: "conv1" - type: kConvolution + type: kCConvolution srclayers: "rgb" convolution_conf { num_filters: 32 @@ -84,7 +84,7 @@ neuralnet { layer { name: "pool1" - type: kPooling + type: kCPooling srclayers: "conv1" pooling_conf { pool: MAX @@ -109,7 +109,7 @@ neuralnet { } layer { name: "conv2" - type: kConvolution + type: kCConvolution srclayers: "norm1" convolution_conf { num_filters: 32 @@ -140,10 +140,10 @@ neuralnet { } layer { name: "pool2" - type: kPooling + type: kCPooling srclayers: "relu2" pooling_conf { - pool: AVE + pool: AVG kernel: 3 stride: 2 } @@ -160,7 +160,7 @@ neuralnet { } layer { name: "conv3" - type: kConvolution + type: kCConvolution srclayers: "norm2" convolution_conf { num_filters: 64 @@ -190,10 +190,10 @@ neuralnet { } layer { name: "pool3" - type: kPooling + type: kCPooling srclayers: "relu3" pooling_conf { - pool: AVE + pool: AVG kernel: 3 stride: 2 } @@ -237,5 +237,7 @@ neuralnet { cluster { nworker_groups: 1 nserver_groups: 1 + nworkers_per_group: 1 + nworkers_per_procs: 1 workspace: "examples/cifar10" } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/mnist/conv.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf index aaf34f2..7f7a158 100644 --- a/examples/mnist/conv.conf +++ b/examples/mnist/conv.conf @@ -57,7 +57,7 @@ neuralnet { } layer { name: "conv1" - type: kConvolution + type: kCConvolution srclayers: "mnist" convolution_conf { num_filters: 20 @@ -81,7 +81,7 @@ neuralnet { } layer { name: "pool1" - type: kPooling + type: kCPooling srclayers: "conv1" pooling_conf { pool: MAX @@ -91,7 +91,7 @@ neuralnet { } layer { name: "conv2" - type: kConvolution + type: kCConvolution srclayers: "pool1" convolution_conf { num_filters: 50 @@ -115,7 +115,7 @@ neuralnet { } layer { name: "pool2" - type: kPooling + type: kCPooling srclayers: "conv2" pooling_conf { pool: MAX http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/neuralnet/neuron_layer.h ---------------------------------------------------------------------- diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h index e5663d8..dd45eec 100644 --- a/include/neuralnet/neuron_layer.h +++ b/include/neuralnet/neuron_layer.h @@ -36,6 +36,15 @@ class ConvolutionLayer : public NeuronLayer { Blob<float> col_data_, col_grad_; }; +/** + * Use im2col from Caffe + */ +class CConvolutionLayer : public ConvolutionLayer { + public: + void ComputeFeature(int flag, Metric* perf) override; + void ComputeGradient(int flag, Metric* perf) override; +}; + class DropoutLayer : public NeuronLayer { public: void Setup(const LayerProto& proto, int npartitions) override; @@ -85,6 +94,18 @@ class PoolingLayer : public NeuronLayer { PoolingProto_PoolMethod pool_; }; +/** + * Use book-keeping for BP following Caffe's pooling implementation + */ +class CPoolingLayer : public PoolingLayer { + public: + void Setup(const LayerProto& proto, int npartitions); + void ComputeFeature(int flag, Metric *perf) override; + void ComputeGradient(int flag, Metric* perf) override; + private: + Blob<float> mask_; +}; + class ReLULayer : public NeuronLayer { public: void Setup(const LayerProto& proto, int npartitions) override; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/utils/common.h ---------------------------------------------------------------------- diff --git a/include/utils/common.h b/include/utils/common.h index 868047a..599424d 100644 --- a/include/utils/common.h +++ b/include/utils/common.h @@ -14,14 +14,7 @@ namespace singa { std::string IntVecToString(const std::vector<int>& vec); std::string VStringPrintf(std::string fmt, va_list l); std::string StringPrintf(std::string fmt, ...); -void ReadProtoFromTextFile(const char* filename, - google::protobuf::Message* proto); -void WriteProtoToTextFile(const google::protobuf::Message& proto, - const char* filename); -void ReadProtoFromBinaryFile(const char* filename, - google::protobuf::Message* proto); -void WriteProtoToBinaryFile(const google::protobuf::Message& proto, - const char* filename); + /** * Locate the position of the arg in arglist. * @@ -102,6 +95,39 @@ class Metric { std::unordered_map<std::string, std::pair<int, float>> entry_; }; +using google::protobuf::Message; +void Im2col(const float* data_im, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* data_col); +void Col2im(const float* data_col, const int channels, + const int height, const int width, const int patch_h, const int patch_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* data_im); +void ForwardMaxPooling(const float* bottom, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* top, float* mask); +void BackwardMaxPooling(const float* top, const float* mask, const int num, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + float* bottom); +void ForwardAvgPooling(const float* bottom, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* top); +void BackwardAvgPooling(const float* top, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* bottom); + +void ReadProtoFromTextFile(const char* filename, Message* proto); +void WriteProtoToTextFile(const Message& proto, const char* filename); +void ReadProtoFromBinaryFile(const char* filename, Message* proto); +void WriteProtoToBinaryFile(const Message& proto, const char* filename); + + } // namespace singa #endif // SINGA_UTILS_COMMON_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/driver.cc ---------------------------------------------------------------------- diff --git a/src/driver.cc b/src/driver.cc index f017f45..a891a08 100644 --- a/src/driver.cc +++ b/src/driver.cc @@ -32,6 +32,8 @@ void Driver::Init(int argc, char **argv) { RegisterLayer<BridgeDstLayer, int>(kBridgeDst); RegisterLayer<BridgeSrcLayer, int>(kBridgeSrc); RegisterLayer<ConvolutionLayer, int>(kConvolution); + RegisterLayer<CConvolutionLayer, int>(kCConvolution); + RegisterLayer<CPoolingLayer, int>(kCPooling); RegisterLayer<ConcateLayer, int>(kConcate); RegisterLayer<DropoutLayer, int>(kDropout); RegisterLayer<EuclideanLossLayer, int>(kEuclideanLoss); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/neuralnet/neuron_layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc index b86d7da..edfa022 100644 --- a/src/neuralnet/neuron_layer.cc +++ b/src/neuralnet/neuron_layer.cc @@ -134,6 +134,49 @@ void ConvolutionLayer::ComputeGradient(int flag, Metric* perf) { } } +/******************* Implementation for CConvolutionLayer *********/ +void CConvolutionLayer::ComputeFeature(int flag, Metric* perf) { + auto src = Tensor4(srclayers_[0]->mutable_data(this)); + auto data = Tensor3(&data_); + auto col = Tensor2(&col_data_); + auto weight = Tensor2(weight_->mutable_data()); + auto bias = Tensor1(bias_->mutable_data()); + + for (int n = 0; n < batchsize_; n++) { + Im2col(src[n].dptr, channels_, height_, width_, + kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr); + data[n] = dot(weight, col); + } + data += expr::broadcast<1>(bias, data.shape); +} + +void CConvolutionLayer::ComputeGradient(int flag, Metric* perf) { + auto src = Tensor4(srclayers_[0]->mutable_data(this)); + auto col = Tensor2(&col_data_); + auto weight = Tensor2(weight_->mutable_data()); + + auto grad = Tensor3(&grad_); + auto gcol = Tensor2(&col_grad_); + auto gweight = Tensor2(weight_->mutable_grad()); + auto gbias = Tensor1(bias_->mutable_grad()); + gweight = 0.f; + Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this); + Tensor<cpu, 4> gsrc(nullptr, Shape4(batchsize_, channels_, height_, width_)); + if (gsrcblob != nullptr) + gsrc.dptr = gsrcblob->mutable_cpu_data(); + gbias = expr::sumall_except_dim<1>(grad); + for(int n = 0; n < batchsize_; n++) { + Im2col(src[n].dptr, channels_, height_, width_, + kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr); + gweight += dot(grad[n], col.T()); + if (gsrcblob != nullptr) { + gcol = dot(weight.T(), grad[n]); + Col2im(gcol.dptr, channels_, height_, width_, + kernel_, kernel_, pad_, pad_, stride_, stride_, gsrc[n].dptr); + } + } +} + /****************** Implementation for DropoutLayer ***********************/ void DropoutLayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); @@ -430,7 +473,7 @@ void PoolingLayer::Setup(const LayerProto& proto, int npartitions) { stride_ = pool_conf.stride(); CHECK_LT(pad_, kernel_); pool_ = proto.pooling_conf().pool(); - CHECK(pool_ == PoolingProto_PoolMethod_AVE + CHECK(pool_ == PoolingProto_PoolMethod_AVG || pool_ == PoolingProto_PoolMethod_MAX) << "Padding implemented only for average and max pooling."; const auto& srcshape = srclayers_[0]->data(this).shape(); @@ -455,7 +498,7 @@ void PoolingLayer::ComputeFeature(int flag, Metric* perf) { auto data = Tensor4(&data_); if (pool_ == PoolingProto_PoolMethod_MAX) data = expr::pool<red::maximum>(src, kernel_, stride_); - else if (pool_ == PoolingProto_PoolMethod_AVE) + else if (pool_ == PoolingProto_PoolMethod_AVG) data = expr::pool<red::sum>(src, kernel_, stride_) * (1.0f / (kernel_ * kernel_)); } @@ -471,11 +514,44 @@ void PoolingLayer::ComputeGradient(int flag, Metric* perf) { auto grad = Tensor4(&grad_); if (pool_ == PoolingProto_PoolMethod_MAX) gsrc = expr::unpool<red::maximum>(src, data, grad, kernel_, stride_); - else if (pool_ == PoolingProto_PoolMethod_AVE) + else if (pool_ == PoolingProto_PoolMethod_AVG) gsrc = expr::unpool<red::sum>(src, data, grad, kernel_, stride_) * (1.0f / (kernel_ * kernel_)); } +/***************** Implementation of CPoolingLayer ***************/ + +void CPoolingLayer::Setup(const LayerProto& proto, int npartitions) { + PoolingLayer::Setup(proto, npartitions); + if(pool_ == PoolingProto_PoolMethod_MAX) + mask_.ReshapeLike(data_); +} +void CPoolingLayer::ComputeFeature(int flag, Metric* perf) { + if(pool_ == PoolingProto_PoolMethod_MAX) + ForwardMaxPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(), + batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_, + stride_, stride_, data_.mutable_cpu_data(), mask_.mutable_cpu_data()); + else if(pool_ == PoolingProto_PoolMethod_AVG) + ForwardAvgPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(), + batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_, + stride_, stride_, data_.mutable_cpu_data()); + else + LOG(FATAL) << "unknow pooling method"; +} + +void CPoolingLayer::ComputeGradient(int flag, Metric* perf) { + if(pool_ == PoolingProto_PoolMethod_MAX) + BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_, + channels_, height_, width_, kernel_, kernel_, pad_, pad_, + stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data()); + else if(pool_ == PoolingProto_PoolMethod_AVG) + BackwardAvgPooling(grad_.cpu_data(), batchsize_, + channels_, height_, width_, kernel_, kernel_, pad_, pad_, + stride_, stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data()); + else + LOG(FATAL) << "unknow pooling method"; +} + /***************** Implementation for ReLULayer *****************************/ void ReLULayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/proto/job.proto ---------------------------------------------------------------------- diff --git a/src/proto/job.proto b/src/proto/job.proto index 9adae6d..7861eae 100644 --- a/src/proto/job.proto +++ b/src/proto/job.proto @@ -389,7 +389,7 @@ message PoolingProto { required int32 kernel= 1; enum PoolMethod { MAX = 0; - AVE = 1; + AVG = 1; } // The pooling method optional PoolMethod pool = 30 [default = MAX]; @@ -514,6 +514,8 @@ enum LayerType { // Neuron layers // - Feature transformation kConvolution = 1; + kCConvolution = 27; + kCPooling = 28; kDropout = 4; kInnerProduct = 5; kLRN = 6; @@ -535,6 +537,7 @@ enum LayerType { kSlice = 12; kSplit = 13; + // Indicate the user defined layer. Users should configure user_type kUserLayer = 102; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/trainer/trainer.cc ---------------------------------------------------------------------- diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc index 1d78c37..b6dc729 100644 --- a/src/trainer/trainer.cc +++ b/src/trainer/trainer.cc @@ -481,7 +481,6 @@ const vector<Msg*> Trainer::HandleUpdate(ParamEntry *entry, Msg** msg) { mshadow::Tensor<mshadow::cpu,1> grad((*it)->mutable_cpu_grad(), shape); sum += grad; } - sum /= entry->num_total; } int step = (*msg)->trgt_version(); GenMsgs(kUpdate, step, entry, *msg, &ret); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/blob.cc ---------------------------------------------------------------------- diff --git a/src/utils/blob.cc b/src/utils/blob.cc index fd402a8..3df1aef 100644 --- a/src/utils/blob.cc +++ b/src/utils/blob.cc @@ -1,5 +1,5 @@ /** - * The code is adapted from that of Caffe whose license is attached. + * The code is adapted from Caffe whose license is attached. * * COPYRIGHT * All contributions by the University of California: http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/common.cc ---------------------------------------------------------------------- diff --git a/src/utils/common.cc b/src/utils/common.cc index d13faea..3c3dc39 100644 --- a/src/utils/common.cc +++ b/src/utils/common.cc @@ -1,32 +1,70 @@ +/** + * The some functions in this file are adapted from Caffe whose license + * is attached. + * + * COPYRIGHT + * All contributions by the University of California: + * Copyright (c) 2014, The Regents of the University of California (Regents) + * All rights reserved. + * All other contributions: + * Copyright (c) 2014, the respective contributors + * All rights reserved. + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * LICENSE + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * CONTRIBUTION AGREEMENT + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + */ #include "utils/common.h" -#include <arpa/inet.h> -#include <fcntl.h> -#include <glog/logging.h> -#include <google/protobuf/io/coded_stream.h> -#include <google/protobuf/io/zero_copy_stream_impl.h> -#include <google/protobuf/text_format.h> -#include <stdarg.h> -#include <stdio.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/types.h> + #include <netinet/in.h> #include <net/if.h> +#include <arpa/inet.h> + +#include <stdarg.h> +#include <stdio.h> #include <time.h> #include <unistd.h> +#include <fcntl.h> +#include <cfloat> + +#include <glog/logging.h> +#include <google/protobuf/io/coded_stream.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/text_format.h> namespace singa { using std::string; using std::vector; -using google::protobuf::io::CodedInputStream; -using google::protobuf::io::FileInputStream; -using google::protobuf::io::FileOutputStream; -using google::protobuf::io::ZeroCopyInputStream; -using google::protobuf::Message; - const int kBufLen = 1024; string IntVecToString(const vector<int>& vec) { @@ -56,42 +94,7 @@ string StringPrintf(string fmt, ...) { return result; } -// the proto related functions are from Caffe. -void ReadProtoFromTextFile(const char* filename, Message* proto) { - int fd = open(filename, O_RDONLY); - CHECK_NE(fd, -1) << "File not found: " << filename; - FileInputStream* input = new FileInputStream(fd); - CHECK(google::protobuf::TextFormat::Parse(input, proto)); - delete input; - close(fd); -} -void WriteProtoToTextFile(const Message& proto, const char* filename) { - int fd = open(filename, O_WRONLY | O_CREAT, 0644); - FileOutputStream* output = new FileOutputStream(fd); - CHECK(google::protobuf::TextFormat::Print(proto, output)); - delete output; - close(fd); -} - -void ReadProtoFromBinaryFile(const char* filename, Message* proto) { - int fd = open(filename, O_RDONLY); - CHECK_NE(fd, -1) << "File not found: " << filename; - ZeroCopyInputStream* raw_input = new FileInputStream(fd); - CodedInputStream* coded_input = new CodedInputStream(raw_input); - // upper limit 512MB, warning threshold 256MB - coded_input->SetTotalBytesLimit(536870912, 268435456); - CHECK(proto->ParseFromCodedStream(coded_input)); - delete coded_input; - delete raw_input; - close(fd); -} - -void WriteProtoToBinaryFile(const Message& proto, const char* filename) { - int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); - CHECK_NE(fd, -1) << "File cannot open: " << filename; - CHECK(proto.SerializeToFileDescriptor(fd)); -} int ArgPos(int argc, char** arglist, const char* arg) { for (int i = 0; i < argc; i++) { @@ -293,4 +296,247 @@ void Metric::ParseFrom(const string& msg) { } } + +/*************Below functions are adapted from Caffe ************/ +using google::protobuf::io::CodedInputStream; +using google::protobuf::io::FileInputStream; +using google::protobuf::io::FileOutputStream; +using google::protobuf::io::ZeroCopyInputStream; + + +void Im2col(const float* data_im, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* data_col) { + int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; + int channels_col = channels * kernel_h * kernel_w; + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % kernel_w; + int h_offset = (c / kernel_w) % kernel_h; + int c_im = c / kernel_h / kernel_w; + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { + int h_pad = h * stride_h - pad_h + h_offset; + int w_pad = w * stride_w - pad_w + w_offset; + if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) + data_col[(c * height_col + h) * width_col + w] = + data_im[(c_im * height + h_pad) * width + w_pad]; + else + data_col[(c * height_col + h) * width_col + w] = 0; + } + } + } +} + +void Col2im(const float* data_col, const int channels, + const int height, const int width, const int patch_h, const int patch_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* data_im) { + memset(data_im, 0, height * width * channels * sizeof(float)); + int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; + int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; + int channels_col = channels * patch_h * patch_w; + for (int c = 0; c < channels_col; ++c) { + int w_offset = c % patch_w; + int h_offset = (c / patch_w) % patch_h; + int c_im = c / patch_h / patch_w; + for (int h = 0; h < height_col; ++h) { + for (int w = 0; w < width_col; ++w) { + int h_pad = h * stride_h - pad_h + h_offset; + int w_pad = w * stride_w - pad_w + w_offset; + if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) + data_im[(c_im * height + h_pad) * width + w_pad] += + data_col[(c * height_col + h) * width_col + w]; + } + } + } +} + +void ForwardMaxPooling(const float* bottom, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* top, float* mask) { + int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1; + int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1; + int top_count = num * top_height * top_width * channels; + for (int i = 0; i < top_count; i++) { + mask[i] = -1; + top[i] = -FLT_MAX; + } + const int bottom_offset = height * width; + const int top_offset = top_height * top_width; + // The main loop + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < top_height; ++ph) { + for (int pw = 0; pw < top_width; ++pw) { + int hstart = ph * stride_h - pad_h; + int wstart = pw * stride_w - pad_w; + int hend = std::min(hstart + kernel_h, height); + int wend = std::min(wstart + kernel_w, width); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + const int top_index = ph * top_width + pw; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width + w; + if (bottom[index] > top[top_index]) { + top[top_index] = bottom[index]; + mask[top_index] = index; + } + } + } + } + } + // compute offset + bottom += bottom_offset; + top += top_offset; + mask += top_offset; + } + } +} + +void BackwardMaxPooling(const float* top, const float* mask, const int num, + const int channels, const int height, const int width, + const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + float* bottom) { + int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1; + int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1; + const int top_offset = top_height * top_width; + const int bottom_offset = height * width; + memset(bottom, 0, sizeof(float) * num * channels * bottom_offset); + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < top_height; ++ph) { + for (int pw = 0; pw < top_width; ++pw) { + const int top_idx = ph * top_width + pw; + const int bottom_idx = static_cast<int>(mask[top_idx]); + bottom[bottom_idx] += top[top_idx]; + } + } + top += top_offset; + mask += top_offset; + bottom += bottom_offset; + } + } +} + +void ForwardAvgPooling(const float* bottom, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* top) { + int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1; + int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1; + int top_count = num * top_height * top_width * channels; + for (int i = 0; i < top_count; i++) { + top[i] = 0; + } + const int bottom_offset = height * width; + const int top_offset = top_height * top_width; + // The main loop + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < top_height; ++ph) { + for (int pw = 0; pw < top_width; ++pw) { + int hstart = ph * stride_h - pad_h; + int wstart = pw * stride_w - pad_w; + int hend = std::min(hstart + kernel_h, height+pad_h); + int wend = std::min(wstart + kernel_w, width+pad_w); + int pool_size = (hend-hstart) * (wend-wstart); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + hend = std::min(hend, height); + wend = std::min(wend, width); + const int top_index = ph * top_width + pw; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width + w; + top[top_index] += bottom[index]; + } + } + top[top_index] /= pool_size; + } + } + // compute offset + bottom += bottom_offset; + top += top_offset; + } + } +} + +void BackwardAvgPooling(const float* top, const int num, const int channels, + const int height, const int width, const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, const int stride_h, const int stride_w, + float* bottom) { + int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1; + int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1; + const int top_offset = top_height * top_width; + const int bottom_offset = height * width; + memset(bottom, 0, sizeof(float) * num * channels * bottom_offset); + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < top_height; ++ph) { + for (int pw = 0; pw < top_width; ++pw) { + int hstart = ph * stride_h - pad_h; + int wstart = pw * stride_w - pad_w; + int hend = std::min(hstart + kernel_h, height+pad_h); + int wend = std::min(wstart + kernel_w, width+pad_w); + int pool_size = (hend-hstart) * (wend-wstart); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + hend = std::min(hend, height); + wend = std::min(wend, width); + const int top_index = ph * top_width + pw; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width + w; + bottom[index] += top[top_index] / pool_size; + } + } + + } + } + top += top_offset; + bottom += bottom_offset; + } + } +} + +void ReadProtoFromTextFile(const char* filename, Message* proto) { + int fd = open(filename, O_RDONLY); + CHECK_NE(fd, -1) << "File not found: " << filename; + FileInputStream* input = new FileInputStream(fd); + CHECK(google::protobuf::TextFormat::Parse(input, proto)); + delete input; + close(fd); +} + +void WriteProtoToTextFile(const Message& proto, const char* filename) { + int fd = open(filename, O_WRONLY | O_CREAT, 0644); + FileOutputStream* output = new FileOutputStream(fd); + CHECK(google::protobuf::TextFormat::Print(proto, output)); + delete output; + close(fd); +} + +void ReadProtoFromBinaryFile(const char* filename, Message* proto) { + int fd = open(filename, O_RDONLY); + CHECK_NE(fd, -1) << "File not found: " << filename; + ZeroCopyInputStream* raw_input = new FileInputStream(fd); + CodedInputStream* coded_input = new CodedInputStream(raw_input); + // upper limit 512MB, warning threshold 256MB + coded_input->SetTotalBytesLimit(536870912, 268435456); + CHECK(proto->ParseFromCodedStream(coded_input)); + delete coded_input; + delete raw_input; + close(fd); +} + +void WriteProtoToBinaryFile(const Message& proto, const char* filename) { + int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644); + CHECK_NE(fd, -1) << "File cannot open: " << filename; + CHECK(proto.SerializeToFileDescriptor(fd)); +} } // namespace singa
