ptrendx commented on a change in pull request #16039: FullyConnected Bias
performance improvement on GPU
URL: https://github.com/apache/incubator-mxnet/pull/16039#discussion_r326332767
##########
File path: src/operator/nn/fully_connected-inl.h
##########
@@ -169,19 +355,7 @@ void FCBackward(const OpContext &ctx, const
FullyConnectedParam ¶m,
linalg_gemm(grad, data, gwmat, true, false, s, req[fullc::kWeight]);
// gradient of bias
if (!param.no_bias) {
- Tensor<xpu, 1, DType> gbias = in_grad[fullc::kBias].get<xpu, 1, DType>(s);
- TBlob grad_blob = TBlob(grad);
- TBlob gbias_blob = TBlob(gbias);
- mxnet::TShape x(1, 0);
- mxnet::TShape small;
- if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) {
- small = gbias_blob.shape_;
- } else {
- small = ReduceAxesShapeImpl(grad_blob.shape_,
dmlc::optional<mxnet::TShape>(x), true, false);
- }
- ReduceAxesComputeImpl<xpu, mshadow::red::sum, false, false,
- mshadow_op::identity>(ctx, {grad_blob},
{req[fullc::kBias]},
- {in_grad[fullc::kBias]},
small);
+ AddBiasGrad(in_grad[fullc::kBias], grad, req[fullc::kBias],
param.num_hidden, ctx);
Review comment:
Tested on TitanV, shape (3584,768) (one of the shapes in BERT), 10
FullyConnected layers back to back.
Old:
float16
```
247.14us void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto,
int=8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, int=2,
mshadow::half::half_t>, mshadow::half::half_t>,
mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu,
int=1, mshadow::half::half_t>, mshadow::half::half_t, int=2, int=1>,
mshadow::half::half_t>>(mshadow::gpu, int, mshadow::Shape<int=2>, int=2)
517.79us void mxnet::op::broadcast::reduce_kernel<mshadow::red::sum,
int=2, mshadow::half::half_t, mshadow::half::half_t, mshadow::half::half_t,
mxnet::op::mshadow_op::identity, int=2>(int, int, bool, mshadow::half::half_t
const *, mshadow::half::half_t*, mshadow::Shape<int=2>, mshadow::Shape,
mshadow::Shape, mshadow::Shape, int, bool)
25.984us void
mxnet::op::broadcast::reduce_lines_kernel<mshadow::red::sum,
mshadow::half::half_t>(int, int, bool, int, mshadow::half::half_t co
nst *, mxnet::op::broadcast::reduce_lines_kernel<mshadow::red::sum,
mshadow::half::half_t>*)
```
float32
```
396.90us void mshadow::cuda::MapPlanKernel<mshadow::sv::plusto,
int=8, mshadow::expr::Plan<mshadow::Tensor<mshadow::gpu, int=2, float>, float>,
mshadow::expr::Plan<mshadow::expr::Broadcast1DExp<mshadow::Tensor<mshadow::gpu,
int=1, float>, float, int=2, int=1>, float>>(mshadow::gpu, int,
mshadow::Shape<int=2>, int=2)
476.22us void mxnet::op::broadcast::reduce_kernel<mshadow::red::sum,
int=2, float, float, float, mxnet::op::mshadow_op::identity, int=2>(int, int,
bool, float const *, float*, mshadow::Shape<int=2>, mshadow::Shape,
mshadow::Shape, mshadow::Shape, int, bool)
25.729us void
mxnet::op::broadcast::reduce_lines_kernel<mshadow::red::sum, float>(int, int,
bool, int, float const *,
mxnet::op::broadcast::reduce_lines_kernel<mshadow::red::sum, float>*)
```
New:
float16
```
177.76us void mxnet::op::add_bias_kernel<mshadow::half::half_t,
double>(mshadow::half::half_t*, mshadow::half::half_t, unsigned long, unsigned
long)
135.23us void mxnet::op::AddBiasGradKernelPhase1<double,
mshadow::half::half_t, float>(float*, mshadow::half::half_t const *, unsigned
long, unsigned long)
68.481us void
mxnet::op::AddBiasGradKernelPhase2<mshadow::half::half_t, float>(float const *,
mshadow::half::half_t*, int, int, mxnet::OpReqType)
```
float32
```
390.72us void mxnet::op::add_bias_kernel<float, double>(float*,
float, unsigned long, unsigned long)
242.72us void mxnet::op::AddBiasGradKernelPhase1<double, float,
float>(float*, float const *, unsigned long, unsigned long)
42.753us void mxnet::op::AddBiasGradKernelPhase2<float, float>(float
const *, float*, int, int, mxnet::OpReqType)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services