[
https://issues.apache.org/jira/browse/SINGA-249?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15522701#comment-15522701
]
hacker99 commented on SINGA-249:
--------------------------------
What make me confuse is that code in
https://github.com/apache/incubator-singa/blob/master/src/model/feed_forward_net.cc
Line#221 and Line#224 : when call layers_.at(i)->Backward just use the final
layer gradient?
218 const vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor& grad)
{
219 vector<Tensor> param_grads;
220 std::stack<Tensor> buf;
221 Tensor tmp = grad;
222 for (int i = layers_.size() - 1; i >= 0; i--) {
223 // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
224 auto ret = layers_.at(i)->Backward(flag, tmp);
225 tmp = ret.first;
226 if (ret.second.size()) {
227 for (int k = ret.second.size() - 1; k >= 0; k--) {
228 buf.push(ret.second[k]);
229 // LOG(INFO) << " " << buf.top().L1();
230 }
231 }
232 }
233 while (!buf.empty()) {
234 param_grads.push_back(buf.top());
235 buf.pop();
236 }
237 return param_grads;
238 }
I am new to MachineLearning, more detail information implement BP Algorithm in
Singa will be helpful.
Thank you!
> Convolution BP
> --------------
>
> Key: SINGA-249
> URL: https://issues.apache.org/jira/browse/SINGA-249
> Project: Singa
> Issue Type: Wish
> Environment: ubuntu 14.04,singa 1.0
> Reporter: hacker99
>
> I'm curious about how to calculate the gradient of the back propagation
> algorithm eg. Convolution layer. Can anyone explain to me the details of the
> implementation of the formula and the code? Very grateful, if there is some
> documents or just tell why dw += Mult(grad_b, col_data.T())?
> #code from src/model/layer/convolution.cc
> const std::pair<Tensor, vector<Tensor>> Convolution::Backward(
> int flag, const Tensor &grad) {
> CHECK_EQ(grad.device()->lang(), kCpp);
> CHECK_EQ(grad.nDim(), 4u);
> CHECK(!buf_.empty());
> Tensor src_data = buf_.top();
> buf_.pop();
> vector<Tensor> param_grad;
> Tensor dx;
> Tensor db, dw;
> dx.ResetLike(src_data);
> db.ResetLike(bias_);
> dw.ResetLike(weight_);
> dw.SetValue(0.0f);
> size_t batchsize = grad.shape(0);
> size_t imagesize = src_data.Size() / batchsize;
> if (bias_term_) {
> Tensor tmp1 =
> Reshape(grad, Shape{batchsize * num_filters_,
> grad.Size() / (batchsize * num_filters_)});
> Tensor tmp2(Shape{batchsize * num_filters_});
> SumColumns(tmp1, &tmp2);
> Tensor tmp3 = Reshape(tmp2, Shape{batchsize, num_filters_});
> SumRows(tmp3, &db);
> }
> auto in_data = src_data.data<float>();
> Tensor col_data(Shape{col_height_, col_width_});
> float *data_col = new float[col_height_ * col_width_];
> float *dx_b = new float[imagesize];
> for (size_t b = 0; b < batchsize; b++) {
> Im2col(in_data + b * imagesize, channels_, height_, width_, kernel_h_,
> kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data_col);
> col_data.CopyDataFromHostPtr(data_col, col_height_ * col_width_);
> Tensor grad_b(Shape{num_filters_, conv_height_ * conv_width_});
> CopyDataToFrom(&grad_b, grad, grad_b.Size(), 0, b * grad_b.Size());
> dw += Mult(grad_b, col_data.T());
> Tensor dcol_b = Mult(weight_.T(), grad_b);
> auto dcol_data = dcol_b.data<float>();
> Col2im(dcol_data, channels_, height_, width_, kernel_h_, kernel_w_,
> pad_h_,
> pad_w_, stride_h_, stride_w_, dx_b);
> dx.CopyDataFromHostPtr(dx_b, imagesize, b * imagesize);
> }
> param_grad.push_back(dw);
> param_grad.push_back(db);
> delete[] data_col;
> delete[] dx_b;
> return std::make_pair(dx, param_grad);
> }
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)