[1/3] incubator-singa git commit: SINGA-51 Improve the convolution and pooling operations

wangsh Mon, 14 Sep 2015 03:02:36 -0700

Repository: incubator-singa
Updated Branches:
  refs/heads/master 077d3804f -> d5d817e14



SINGA-51 Improve the convolution and pooling operations

Caffe's im2col is adopted to speed up the convolution operation.
The max pooling operation is accelerated by book-keeping the max neuron
position like Caffe.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d59eecf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d59eecf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d59eecf

Branch: refs/heads/master
Commit: 6d59eecf0502a0f3575770cb166be94775cf83f2
Parents: 50deedd
Author: Wei Wang <[email protected]>
Authored: Sun Sep 13 20:00:02 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Sun Sep 13 20:00:02 2015 +0800

----------------------------------------------------------------------
 examples/cifar10/job.conf        |  20 +-
 examples/mnist/conv.conf         |   8 +-
 include/neuralnet/neuron_layer.h |  21 +++
 include/utils/common.h           |  42 ++++-
 src/driver.cc                    |   2 +
 src/neuralnet/neuron_layer.cc    |  82 +++++++-
 src/proto/job.proto              |   5 +-
 src/trainer/trainer.cc           |   1 -
 src/utils/blob.cc                |   2 +-
 src/utils/common.cc              | 344 +++++++++++++++++++++++++++++-----
 10 files changed, 451 insertions(+), 76 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/cifar10/job.conf
----------------------------------------------------------------------
diff --git a/examples/cifar10/job.conf b/examples/cifar10/job.conf
index 0fdd244..b36c45a 100644
--- a/examples/cifar10/job.conf
+++ b/examples/cifar10/job.conf
@@ -27,7 +27,7 @@ neuralnet {
     type: kShardData
     sharddata_conf {
       path: "examples/cifar10/cifar10_train_shard"
-      batchsize: 16
+      batchsize: 64
       random_skip: 5000
     }
     exclude: kTest
@@ -57,7 +57,7 @@ neuralnet {
 
   layer {
     name: "conv1"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "rgb"
     convolution_conf {
       num_filters: 32
@@ -84,7 +84,7 @@ neuralnet {
 
   layer {
     name: "pool1"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv1"
     pooling_conf {
       pool: MAX
@@ -109,7 +109,7 @@ neuralnet {
   }
   layer {
     name: "conv2"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "norm1"
     convolution_conf {
       num_filters: 32
@@ -140,10 +140,10 @@ neuralnet {
   }
   layer {
     name: "pool2"
-    type: kPooling
+    type: kCPooling
     srclayers: "relu2"
     pooling_conf {
-      pool: AVE
+      pool: AVG
       kernel: 3
       stride: 2
     }
@@ -160,7 +160,7 @@ neuralnet {
   }
   layer {
     name: "conv3"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "norm2"
     convolution_conf {
       num_filters: 64
@@ -190,10 +190,10 @@ neuralnet {
   }
   layer {
     name: "pool3"
-    type: kPooling
+    type: kCPooling
     srclayers: "relu3"
     pooling_conf {
-      pool: AVE
+      pool: AVG
       kernel: 3
       stride: 2
     }
@@ -237,5 +237,7 @@ neuralnet {
 cluster {
   nworker_groups: 1
   nserver_groups: 1
+  nworkers_per_group: 1
+  nworkers_per_procs: 1
   workspace: "examples/cifar10"
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/examples/mnist/conv.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf
index aaf34f2..7f7a158 100644
--- a/examples/mnist/conv.conf
+++ b/examples/mnist/conv.conf
@@ -57,7 +57,7 @@ neuralnet {
   }
   layer {
     name: "conv1"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "mnist"
     convolution_conf {
       num_filters: 20
@@ -81,7 +81,7 @@ neuralnet {
   }
   layer {
     name: "pool1"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv1"
     pooling_conf {
       pool: MAX
@@ -91,7 +91,7 @@ neuralnet {
   }
   layer {
     name: "conv2"
-    type: kConvolution
+    type: kCConvolution
     srclayers: "pool1"
     convolution_conf {
       num_filters: 50
@@ -115,7 +115,7 @@ neuralnet {
   }
   layer {
     name: "pool2"
-    type: kPooling
+    type: kCPooling
     srclayers: "conv2"
     pooling_conf {
       pool: MAX

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/neuralnet/neuron_layer.h b/include/neuralnet/neuron_layer.h
index e5663d8..dd45eec 100644
--- a/include/neuralnet/neuron_layer.h
+++ b/include/neuralnet/neuron_layer.h
@@ -36,6 +36,15 @@ class ConvolutionLayer : public NeuronLayer {
   Blob<float> col_data_, col_grad_;
 };
 
+/**
+ * Use im2col from Caffe
+ */
+class CConvolutionLayer : public ConvolutionLayer {
+ public:
+  void ComputeFeature(int flag, Metric* perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+};
+
 class DropoutLayer : public NeuronLayer {
  public:
   void Setup(const LayerProto& proto, int npartitions) override;
@@ -85,6 +94,18 @@ class PoolingLayer : public NeuronLayer {
   PoolingProto_PoolMethod pool_;
 };
 
+/**
+ * Use book-keeping for BP following Caffe's pooling implementation
+ */
+class CPoolingLayer : public PoolingLayer {
+ public:
+  void Setup(const LayerProto& proto, int npartitions);
+  void ComputeFeature(int flag, Metric *perf) override;
+  void ComputeGradient(int flag, Metric* perf) override;
+ private:
+  Blob<float> mask_;
+};
+
 class ReLULayer : public NeuronLayer {
  public:
   void Setup(const LayerProto& proto, int npartitions) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/include/utils/common.h
----------------------------------------------------------------------
diff --git a/include/utils/common.h b/include/utils/common.h
index 868047a..599424d 100644
--- a/include/utils/common.h
+++ b/include/utils/common.h
@@ -14,14 +14,7 @@ namespace singa {
 std::string IntVecToString(const std::vector<int>& vec);
 std::string VStringPrintf(std::string fmt, va_list l);
 std::string StringPrintf(std::string fmt, ...);
-void ReadProtoFromTextFile(const char* filename,
-                           google::protobuf::Message* proto);
-void WriteProtoToTextFile(const google::protobuf::Message& proto,
-                          const char* filename);
-void ReadProtoFromBinaryFile(const char* filename,
-                             google::protobuf::Message* proto);
-void WriteProtoToBinaryFile(const google::protobuf::Message& proto,
-                            const char* filename);
+
 /**
  * Locate the position of the arg in arglist.
  *
@@ -102,6 +95,39 @@ class Metric {
   std::unordered_map<std::string, std::pair<int, float>> entry_;
 };
 
+using google::protobuf::Message;
+void Im2col(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_col);
+void Col2im(const float* data_col, const int channels,
+    const int height, const int width, const int patch_h, const int patch_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_im);
+void ForwardMaxPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top, float* mask);
+void BackwardMaxPooling(const float* top, const float* mask, const int num,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    float* bottom);
+void ForwardAvgPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top);
+void BackwardAvgPooling(const float* top, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* bottom);
+
+void ReadProtoFromTextFile(const char* filename, Message* proto);
+void WriteProtoToTextFile(const Message& proto, const char* filename);
+void ReadProtoFromBinaryFile(const char* filename, Message* proto);
+void WriteProtoToBinaryFile(const Message& proto, const char* filename);
+
+
 }  // namespace singa
 
 #endif  // SINGA_UTILS_COMMON_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index f017f45..a891a08 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -32,6 +32,8 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
   RegisterLayer<BridgeSrcLayer, int>(kBridgeSrc);
   RegisterLayer<ConvolutionLayer, int>(kConvolution);
+  RegisterLayer<CConvolutionLayer, int>(kCConvolution);
+  RegisterLayer<CPoolingLayer, int>(kCPooling);
   RegisterLayer<ConcateLayer, int>(kConcate);
   RegisterLayer<DropoutLayer, int>(kDropout);
   RegisterLayer<EuclideanLossLayer, int>(kEuclideanLoss);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/neuralnet/neuron_layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer.cc b/src/neuralnet/neuron_layer.cc
index b86d7da..edfa022 100644
--- a/src/neuralnet/neuron_layer.cc
+++ b/src/neuralnet/neuron_layer.cc
@@ -134,6 +134,49 @@ void ConvolutionLayer::ComputeGradient(int flag, Metric* 
perf) {
   }
 }
 
+/******************* Implementation for CConvolutionLayer *********/
+void CConvolutionLayer::ComputeFeature(int flag, Metric* perf) {
+  auto src = Tensor4(srclayers_[0]->mutable_data(this));
+  auto data = Tensor3(&data_);
+  auto col = Tensor2(&col_data_);
+  auto weight = Tensor2(weight_->mutable_data());
+  auto bias = Tensor1(bias_->mutable_data());
+
+  for (int n = 0; n < batchsize_; n++) {
+    Im2col(src[n].dptr, channels_, height_, width_,
+        kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr);
+    data[n] = dot(weight, col);
+  }
+  data += expr::broadcast<1>(bias, data.shape);
+}
+
+void CConvolutionLayer::ComputeGradient(int flag, Metric* perf) {
+  auto src = Tensor4(srclayers_[0]->mutable_data(this));
+  auto col = Tensor2(&col_data_);
+  auto weight = Tensor2(weight_->mutable_data());
+
+  auto grad = Tensor3(&grad_);
+  auto gcol = Tensor2(&col_grad_);
+  auto gweight = Tensor2(weight_->mutable_grad());
+  auto gbias = Tensor1(bias_->mutable_grad());
+  gweight = 0.f;
+  Blob<float>* gsrcblob = srclayers_[0]->mutable_grad(this);
+  Tensor<cpu, 4> gsrc(nullptr, Shape4(batchsize_, channels_, height_, width_));
+  if (gsrcblob != nullptr)
+    gsrc.dptr = gsrcblob->mutable_cpu_data();
+  gbias = expr::sumall_except_dim<1>(grad);
+  for(int n = 0; n < batchsize_; n++) {
+    Im2col(src[n].dptr, channels_, height_, width_,
+        kernel_, kernel_, pad_, pad_, stride_, stride_, col.dptr);
+    gweight += dot(grad[n], col.T());
+    if (gsrcblob != nullptr) {
+      gcol = dot(weight.T(), grad[n]);
+      Col2im(gcol.dptr, channels_, height_, width_,
+          kernel_, kernel_, pad_, pad_, stride_, stride_, gsrc[n].dptr);
+    }
+  }
+}
+
 /****************** Implementation for DropoutLayer ***********************/
 void DropoutLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
@@ -430,7 +473,7 @@ void PoolingLayer::Setup(const LayerProto& proto, int 
npartitions) {
   stride_ = pool_conf.stride();
   CHECK_LT(pad_, kernel_);
   pool_ = proto.pooling_conf().pool();
-  CHECK(pool_ == PoolingProto_PoolMethod_AVE
+  CHECK(pool_ == PoolingProto_PoolMethod_AVG
         || pool_ == PoolingProto_PoolMethod_MAX)
         << "Padding implemented only for average and max pooling.";
   const auto& srcshape = srclayers_[0]->data(this).shape();
@@ -455,7 +498,7 @@ void PoolingLayer::ComputeFeature(int flag, Metric* perf) {
   auto data = Tensor4(&data_);
   if (pool_ == PoolingProto_PoolMethod_MAX)
     data = expr::pool<red::maximum>(src, kernel_, stride_);
-  else if (pool_ == PoolingProto_PoolMethod_AVE)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     data = expr::pool<red::sum>(src, kernel_, stride_)
       * (1.0f / (kernel_ * kernel_));
 }
@@ -471,11 +514,44 @@ void PoolingLayer::ComputeGradient(int flag, Metric* 
perf) {
   auto grad = Tensor4(&grad_);
   if (pool_ == PoolingProto_PoolMethod_MAX)
     gsrc = expr::unpool<red::maximum>(src, data, grad, kernel_, stride_);
-  else if (pool_ == PoolingProto_PoolMethod_AVE)
+  else if (pool_ == PoolingProto_PoolMethod_AVG)
     gsrc = expr::unpool<red::sum>(src, data, grad, kernel_, stride_)
            * (1.0f / (kernel_ * kernel_));
 }
 
+/***************** Implementation of CPoolingLayer ***************/
+
+void CPoolingLayer::Setup(const LayerProto& proto, int npartitions) {
+  PoolingLayer::Setup(proto, npartitions);
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    mask_.ReshapeLike(data_);
+}
+void CPoolingLayer::ComputeFeature(int flag, Metric* perf) {
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    ForwardMaxPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
+        batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_, data_.mutable_cpu_data(), mask_.mutable_cpu_data());
+  else if(pool_ == PoolingProto_PoolMethod_AVG)
+    ForwardAvgPooling(srclayers_[0]->mutable_data(this)->mutable_cpu_data(),
+        batchsize_, channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, stride_, data_.mutable_cpu_data());
+  else
+    LOG(FATAL) << "unknow pooling method";
+}
+
+void CPoolingLayer::ComputeGradient(int flag, Metric* perf) {
+  if(pool_ == PoolingProto_PoolMethod_MAX)
+    BackwardMaxPooling(grad_.cpu_data(), mask_.cpu_data(), batchsize_,
+        channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, 
stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());
+  else if(pool_ == PoolingProto_PoolMethod_AVG)
+    BackwardAvgPooling(grad_.cpu_data(), batchsize_,
+        channels_, height_, width_, kernel_, kernel_, pad_, pad_,
+        stride_, 
stride_,srclayers_[0]->mutable_grad(this)->mutable_cpu_data());
+  else
+    LOG(FATAL) << "unknow pooling method";
+}
+
 /***************** Implementation for ReLULayer *****************************/
 void ReLULayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 9adae6d..7861eae 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -389,7 +389,7 @@ message PoolingProto {
   required int32 kernel= 1;
   enum PoolMethod {
     MAX = 0;
-    AVE = 1;
+    AVG = 1;
   }
   // The pooling method
   optional PoolMethod pool = 30 [default = MAX];
@@ -514,6 +514,8 @@ enum LayerType {
   // Neuron layers
   //  - Feature transformation
   kConvolution = 1;
+  kCConvolution = 27;
+  kCPooling = 28;
   kDropout = 4;
   kInnerProduct = 5;
   kLRN = 6;
@@ -535,6 +537,7 @@ enum LayerType {
   kSlice = 12;
   kSplit = 13;
 
+
   // Indicate the user defined layer. Users should configure user_type
   kUserLayer = 102;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index 1d78c37..b6dc729 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -481,7 +481,6 @@ const vector<Msg*> Trainer::HandleUpdate(ParamEntry *entry, 
Msg** msg) {
         mshadow::Tensor<mshadow::cpu,1> grad((*it)->mutable_cpu_grad(), shape);
         sum += grad;
       }
-      sum /= entry->num_total;
     }
     int step = (*msg)->trgt_version();
     GenMsgs(kUpdate, step, entry, *msg, &ret);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/blob.cc
----------------------------------------------------------------------
diff --git a/src/utils/blob.cc b/src/utils/blob.cc
index fd402a8..3df1aef 100644
--- a/src/utils/blob.cc
+++ b/src/utils/blob.cc
@@ -1,5 +1,5 @@
 /**
- * The code is adapted from that of Caffe whose license is attached.
+ * The code is adapted from Caffe whose license is attached.
  *
  * COPYRIGHT
  * All contributions by the University of California:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d59eecf/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index d13faea..3c3dc39 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -1,32 +1,70 @@
+/**
+ * The some functions in this file are adapted from Caffe whose license
+ * is attached.
+ *
+ * COPYRIGHT
+ * All contributions by the University of California:
+ * Copyright (c) 2014, The Regents of the University of California (Regents)
+ * All rights reserved.
+ * All other contributions:
+ * Copyright (c) 2014, the respective contributors
+ * All rights reserved.
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ * LICENSE
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice, 
this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 
FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * CONTRIBUTION AGREEMENT
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ */
 #include "utils/common.h"
 
-#include <arpa/inet.h>
-#include <fcntl.h>
-#include <glog/logging.h>
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/text_format.h>
-#include <stdarg.h>
-#include <stdio.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+
 #include <netinet/in.h>
 #include <net/if.h>
+#include <arpa/inet.h>
+
+#include <stdarg.h>
+#include <stdio.h>
 #include <time.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <cfloat>
+
+#include <glog/logging.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/text_format.h>
 
 namespace singa {
 
 using std::string;
 using std::vector;
-using google::protobuf::io::CodedInputStream;
-using google::protobuf::io::FileInputStream;
-using google::protobuf::io::FileOutputStream;
-using google::protobuf::io::ZeroCopyInputStream;
-using google::protobuf::Message;
-
 const int kBufLen = 1024;
 
 string IntVecToString(const vector<int>& vec) {
@@ -56,42 +94,7 @@ string StringPrintf(string fmt, ...) {
   return result;
 }
 
-// the proto related functions are from Caffe.
-void ReadProtoFromTextFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  FileInputStream* input = new FileInputStream(fd);
-  CHECK(google::protobuf::TextFormat::Parse(input, proto));
-  delete input;
-  close(fd);
-}
 
-void WriteProtoToTextFile(const Message& proto, const char* filename) {
-  int fd = open(filename, O_WRONLY | O_CREAT, 0644);
-  FileOutputStream* output = new FileOutputStream(fd);
-  CHECK(google::protobuf::TextFormat::Print(proto, output));
-  delete output;
-  close(fd);
-}
-
-void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
-  CodedInputStream* coded_input = new CodedInputStream(raw_input);
-  // upper limit 512MB, warning threshold 256MB
-  coded_input->SetTotalBytesLimit(536870912, 268435456);
-  CHECK(proto->ParseFromCodedStream(coded_input));
-  delete coded_input;
-  delete raw_input;
-  close(fd);
-}
-
-void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
-  int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
-  CHECK_NE(fd, -1) << "File cannot open: " << filename;
-  CHECK(proto.SerializeToFileDescriptor(fd));
-}
 
 int ArgPos(int argc, char** arglist, const char* arg) {
   for (int i = 0; i < argc; i++) {
@@ -293,4 +296,247 @@ void Metric::ParseFrom(const string& msg) {
   }
 }
 
+
+/*************Below functions are adapted from Caffe ************/
+using google::protobuf::io::CodedInputStream;
+using google::protobuf::io::FileInputStream;
+using google::protobuf::io::FileOutputStream;
+using google::protobuf::io::ZeroCopyInputStream;
+
+
+void Im2col(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_col) {
+  int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+  int channels_col = channels * kernel_h * kernel_w;
+  for (int c = 0; c < channels_col; ++c) {
+    int w_offset = c % kernel_w;
+    int h_offset = (c / kernel_w) % kernel_h;
+    int c_im = c / kernel_h / kernel_w;
+    for (int h = 0; h < height_col; ++h) {
+      for (int w = 0; w < width_col; ++w) {
+        int h_pad = h * stride_h - pad_h + h_offset;
+        int w_pad = w * stride_w - pad_w + w_offset;
+        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+          data_col[(c * height_col + h) * width_col + w] =
+            data_im[(c_im * height + h_pad) * width + w_pad];
+        else
+          data_col[(c * height_col + h) * width_col + w] = 0;
+      }
+    }
+  }
+}
+
+void Col2im(const float* data_col, const int channels,
+    const int height, const int width, const int patch_h, const int patch_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* data_im) {
+  memset(data_im, 0, height * width * channels * sizeof(float));
+  int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
+  int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
+  int channels_col = channels * patch_h * patch_w;
+  for (int c = 0; c < channels_col; ++c) {
+    int w_offset = c % patch_w;
+    int h_offset = (c / patch_w) % patch_h;
+    int c_im = c / patch_h / patch_w;
+    for (int h = 0; h < height_col; ++h) {
+      for (int w = 0; w < width_col; ++w) {
+        int h_pad = h * stride_h - pad_h + h_offset;
+        int w_pad = w * stride_w - pad_w + w_offset;
+        if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+          data_im[(c_im * height + h_pad) * width + w_pad] +=
+            data_col[(c * height_col + h) * width_col + w];
+      }
+    }
+  }
+}
+
+void ForwardMaxPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top, float* mask) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_count = num * top_height * top_width * channels;
+  for (int i = 0; i < top_count; i++) {
+    mask[i] = -1;
+    top[i] = -FLT_MAX;
+  }
+  const int bottom_offset =  height * width;
+  const int top_offset = top_height * top_width;
+  // The main loop
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height);
+          int wend = std::min(wstart + kernel_w, width);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              if (bottom[index] > top[top_index]) {
+                top[top_index] = bottom[index];
+                mask[top_index] = index;
+              }
+            }
+          }
+        }
+      }
+      // compute offset
+      bottom += bottom_offset;
+      top += top_offset;
+      mask += top_offset;
+    }
+  }
+}
+
+void BackwardMaxPooling(const float* top, const float* mask, const int num,
+    const int channels, const int height, const int width,
+    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    float* bottom) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  const int top_offset = top_height * top_width;
+  const int bottom_offset = height * width;
+  memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          const int top_idx = ph * top_width + pw;
+          const int bottom_idx = static_cast<int>(mask[top_idx]);
+          bottom[bottom_idx] += top[top_idx];
+        }
+      }
+      top += top_offset;
+      mask += top_offset;
+      bottom += bottom_offset;
+    }
+  }
+}
+
+void ForwardAvgPooling(const float* bottom, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* top) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  int top_count = num * top_height * top_width * channels;
+  for (int i = 0; i < top_count; i++) {
+    top[i] = 0;
+  }
+  const int bottom_offset =  height * width;
+  const int top_offset = top_height * top_width;
+  // The main loop
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height+pad_h);
+          int wend = std::min(wstart + kernel_w, width+pad_w);
+          int pool_size = (hend-hstart) * (wend-wstart);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          hend = std::min(hend, height);
+          wend = std::min(wend, width);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              top[top_index] += bottom[index];
+            }
+          }
+          top[top_index] /= pool_size;
+        }
+      }
+      // compute offset
+      bottom += bottom_offset;
+      top += top_offset;
+    }
+  }
+}
+
+void BackwardAvgPooling(const float* top, const int num, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
+    float* bottom) {
+  int top_height = (height + pad_h * 2 -kernel_h ) / stride_h + 1;
+  int top_width = (width + pad_w * 2 -kernel_w ) / stride_w + 1;
+  const int top_offset = top_height * top_width;
+  const int bottom_offset = height * width;
+  memset(bottom, 0, sizeof(float) * num * channels * bottom_offset);
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int ph = 0; ph < top_height; ++ph) {
+        for (int pw = 0; pw < top_width; ++pw) {
+          int hstart = ph * stride_h - pad_h;
+          int wstart = pw * stride_w - pad_w;
+          int hend = std::min(hstart + kernel_h, height+pad_h);
+          int wend = std::min(wstart + kernel_w, width+pad_w);
+          int pool_size = (hend-hstart) * (wend-wstart);
+          hstart = std::max(hstart, 0);
+          wstart = std::max(wstart, 0);
+          hend = std::min(hend, height);
+          wend = std::min(wend, width);
+          const int top_index = ph * top_width + pw;
+          for (int h = hstart; h < hend; ++h) {
+            for (int w = wstart; w < wend; ++w) {
+              const int index = h * width + w;
+              bottom[index] += top[top_index] / pool_size;
+            }
+          }
+
+        }
+      }
+      top += top_offset;
+      bottom += bottom_offset;
+    }
+  }
+}
+
+void ReadProtoFromTextFile(const char* filename, Message* proto) {
+  int fd = open(filename, O_RDONLY);
+  CHECK_NE(fd, -1) << "File not found: " << filename;
+  FileInputStream* input = new FileInputStream(fd);
+  CHECK(google::protobuf::TextFormat::Parse(input, proto));
+  delete input;
+  close(fd);
+}
+
+void WriteProtoToTextFile(const Message& proto, const char* filename) {
+  int fd = open(filename, O_WRONLY | O_CREAT, 0644);
+  FileOutputStream* output = new FileOutputStream(fd);
+  CHECK(google::protobuf::TextFormat::Print(proto, output));
+  delete output;
+  close(fd);
+}
+
+void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
+  int fd = open(filename, O_RDONLY);
+  CHECK_NE(fd, -1) << "File not found: " << filename;
+  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+  CodedInputStream* coded_input = new CodedInputStream(raw_input);
+  // upper limit 512MB, warning threshold 256MB
+  coded_input->SetTotalBytesLimit(536870912, 268435456);
+  CHECK(proto->ParseFromCodedStream(coded_input));
+  delete coded_input;
+  delete raw_input;
+  close(fd);
+}
+
+void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
+  int fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+  CHECK_NE(fd, -1) << "File cannot open: " << filename;
+  CHECK(proto.SerializeToFileDescriptor(fd));
+}
 }  // namespace singa

[1/3] incubator-singa git commit: SINGA-51 Improve the convolution and pooling operations

Reply via email to