[jira] [Created] (SINGA-249) Convolution BP

hacker99 (JIRA) Sun, 25 Sep 2016 07:47:36 -0700

hacker99 created SINGA-249:
------------------------------

             Summary: Convolution BP
                 Key: SINGA-249
                 URL: https://issues.apache.org/jira/browse/SINGA-249
             Project: Singa
          Issue Type: Wish
         Environment: ubuntu 14.04，singa 1.0
            Reporter: hacker99



I'm curious about how to calculate the gradient of the back propagation 
algorithm eg. Convolution layer. Can anyone explain to me the details of the 
implementation of the formula and the code? Very grateful, if there is some 
documents or just tell why   dw += Mult(grad_b, col_data.T())?

#code from src/model/layer/convolution.cc
const std::pair<Tensor, vector<Tensor>> Convolution::Backward(
    int flag, const Tensor &grad) {
  CHECK_EQ(grad.device()->lang(), kCpp);
  CHECK_EQ(grad.nDim(), 4u);
  CHECK(!buf_.empty());
  Tensor src_data = buf_.top();
  buf_.pop();
  vector<Tensor> param_grad;
  Tensor dx;
  Tensor db, dw;
  dx.ResetLike(src_data);
  db.ResetLike(bias_);
  dw.ResetLike(weight_);
  dw.SetValue(0.0f);
  size_t batchsize = grad.shape(0);
  size_t imagesize = src_data.Size() / batchsize;
  if (bias_term_) {
    Tensor tmp1 =
        Reshape(grad, Shape{batchsize * num_filters_,
                            grad.Size() / (batchsize * num_filters_)});
    Tensor tmp2(Shape{batchsize * num_filters_});
    SumColumns(tmp1, &tmp2);
    Tensor tmp3 = Reshape(tmp2, Shape{batchsize, num_filters_});
    SumRows(tmp3, &db);
  }
  auto in_data = src_data.data<float>();
  Tensor col_data(Shape{col_height_, col_width_});
  float *data_col = new float[col_height_ * col_width_];
  float *dx_b = new float[imagesize];
  for (size_t b = 0; b < batchsize; b++) {
    Im2col(in_data + b * imagesize, channels_, height_, width_, kernel_h_,
           kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data_col);
    col_data.CopyDataFromHostPtr(data_col, col_height_ * col_width_);
    Tensor grad_b(Shape{num_filters_, conv_height_ * conv_width_});
    CopyDataToFrom(&grad_b, grad, grad_b.Size(), 0, b * grad_b.Size());
    dw += Mult(grad_b, col_data.T());
    Tensor dcol_b = Mult(weight_.T(), grad_b);
    auto dcol_data = dcol_b.data<float>();
    Col2im(dcol_data, channels_, height_, width_, kernel_h_, kernel_w_, pad_h_,
           pad_w_, stride_h_, stride_w_, dx_b);
    dx.CopyDataFromHostPtr(dx_b, imagesize, b * imagesize);
  }
  param_grad.push_back(dw);
  param_grad.push_back(db);
  delete[] data_col;
  delete[] dx_b;
  return std::make_pair(dx, param_grad);
}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

[jira] [Created] (SINGA-249) Convolution BP

Reply via email to