ptrendx commented on a change in pull request #16039: FullyConnected Bias 
performance improvement on GPU
URL: https://github.com/apache/incubator-mxnet/pull/16039#discussion_r326333603
 
 

 ##########
 File path: src/operator/nn/fully_connected-inl.h
 ##########
 @@ -169,19 +355,7 @@ void FCBackward(const OpContext &ctx, const 
FullyConnectedParam &param,
   linalg_gemm(grad, data, gwmat, true, false, s, req[fullc::kWeight]);
   // gradient of bias
   if (!param.no_bias) {
-    Tensor<xpu, 1, DType> gbias = in_grad[fullc::kBias].get<xpu, 1, DType>(s);
-    TBlob grad_blob = TBlob(grad);
-    TBlob gbias_blob = TBlob(gbias);
-    mxnet::TShape x(1, 0);
-    mxnet::TShape small;
-    if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) {
-      small = gbias_blob.shape_;
-    } else {
-      small = ReduceAxesShapeImpl(grad_blob.shape_, 
dmlc::optional<mxnet::TShape>(x), true, false);
-    }
-    ReduceAxesComputeImpl<xpu, mshadow::red::sum, false, false,
-                          mshadow_op::identity>(ctx, {grad_blob}, 
{req[fullc::kBias]},
-                                                {in_grad[fullc::kBias]}, 
small);
+      AddBiasGrad(in_grad[fullc::kBias], grad, req[fullc::kBias], 
param.num_hidden, ctx);
 
 Review comment:
   Summary of the time improvement:
   For float32
    - FWD: from 396.9 -> 390.7
    - BWD: from 501.9 -> 285.4
   For float16:
    - FWD: from 247.1 -> 177.8
    - BWD: from 543.8 -> 203.7

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to