This is an automated email from the ASF dual-hosted git repository. jxie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 805a71a [MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated version (#11076) 805a71a is described below commit 805a71acf46e60e6c1f289d30000d0c80cefaf1b Author: nihui <shuizhuyuan...@126.com> AuthorDate: Wed May 30 02:35:45 2018 +0800 [MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated version (#11076) * Use group convolution by cuDNNv7 if available * Fix coding style * ident-- for #if statements * more ident-- * more ident-- * prefer cudnnv7 depthwise convolution --- src/operator/nn/convolution.cu | 10 ++- src/operator/nn/cudnn/cudnn_convolution-inl.h | 92 +++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/convolution.cu b/src/operator/nn/convolution.cu index 045e570..65a320d 100644 --- a/src/operator/nn/convolution.cu +++ b/src/operator/nn/convolution.cu @@ -97,7 +97,9 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs, op.Forward(ctx, inputs, req, outputs); }) return; - } else if (param.num_filter == param.num_group && + } +#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7 + if (param.num_filter == param.num_group && param.layout.value() == mshadow::kNCHW && param.num_filter == inputs[conv::kData].shape_[1] && param.kernel.ndim() == 2 && @@ -112,6 +114,7 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs, op.Forward(ctx, inputs, req, outputs); return; } +#endif #if MXNET_USE_CUDNN == 1 // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16). @@ -167,7 +170,9 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs, op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); }) return; - } else if (param.num_filter == param.num_group && + } +#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7 + if (param.num_filter == param.num_group && param.layout.value() == mshadow::kNCHW && param.num_filter == in_data[conv::kData].shape_[1] && param.kernel.ndim() == 2 && @@ -183,6 +188,7 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs, op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad); return; } +#endif #if MXNET_USE_CUDNN == 1 // On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16). diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index ca60c99..4b1cbbe 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -137,6 +137,35 @@ class CuDNNConvolutionOp { DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s); DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, s); + #if CUDNN_MAJOR >= 7 + typename DataType<DType>::ScaleType alpha = 1.0f; + typename DataType<DType>::ScaleType beta = 0.0f; + typename DataType<DType>::ScaleType beta_add = 1.0f; + CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_, + &alpha, + in_desc_, + data_ptr, + filter_desc_, + wmat_ptr, + forward_conv_desc_, + forward_algo_.AlgoNumber(), + workspace.dptr_, + workspace_size, + req[conv::kOut] == kAddTo? &beta_add : &beta, + out_desc_, + out_ptr)); + + if (!param_.no_bias) { + Tensor<gpu, 1, DType> bias = in_data[conv::kBias].get<gpu, 1, DType>(s); + CUDNN_CALL(cudnnAddTensor(s->dnn_handle_, + &alpha, + bias_desc_, + bias.dptr_, + &beta_add, + out_desc_, + out_ptr)); + } + #else for (uint32_t g = 0; g < param_.num_group; ++g) { typename DataType<DType>::ScaleType alpha = 1.0f; typename DataType<DType>::ScaleType beta = 0.0f; @@ -177,6 +206,7 @@ class CuDNNConvolutionOp { #endif } } + #endif // CUDNN_MAJOR >= 7 } void Backward(const OpContext &ctx, @@ -202,6 +232,51 @@ class CuDNNConvolutionOp { GetTempSize(ctx); Tensor<gpu, 1, DType> workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_); size_t workspace_size = TensorSizeBytes(workspace); + #if CUDNN_MAJOR >= 7 + typename DataType<DType>::ScaleType alpha = 1.0f; + typename DataType<DType>::ScaleType beta = 0.0f; + typename DataType<DType>::ScaleType beta_add = 1.0f; + if (!param_.no_bias && (req[conv::kBias] != kNullOp)) { + Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s); + CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr, + req[conv::kBias] == kAddTo ? &beta_add : &beta, + bias_desc_, + gbias.dptr_)); + } + if (req[conv::kWeight] != kNullOp) { + CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_, + &alpha, + in_desc_, + data_ptr, + out_desc_, + grad_ptr, + back_conv_desc_w_, + back_algo_w_.AlgoNumber(), + workspace.dptr_, + workspace_size, + req[conv::kWeight] == kAddTo? &beta_add : &beta, + filter_desc_, + gwmat_ptr)); + } + if (req[conv::kData] != kNullOp) { + CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_, + &alpha, + filter_desc_, + wmat_ptr, + out_desc_, + grad_ptr, + back_conv_desc_, + back_algo_.AlgoNumber(), + workspace.dptr_, + workspace_size, + req[conv::kData] == kAddTo? &beta_add : &beta, + in_desc_, + gdata_ptr)); + } + #else for (uint32_t g = 0; g < param_.num_group; ++g) { typename DataType<DType>::ScaleType alpha = 1.0f; typename DataType<DType>::ScaleType beta = 0.0f; @@ -279,6 +354,7 @@ class CuDNNConvolutionOp { #endif } } + #endif // CUDNN_MAJOR >= 7 } /*! @@ -342,7 +418,10 @@ class CuDNNConvolutionOp { TShape wshape = in_shape[conv::kWeight]; TShape oshape = out_shape[conv::kOut]; TShape dstride, ostride; +#if CUDNN_MAJOR <= 6 wshape[0] /= param_.num_group; +#endif + #if CUDNN_MAJOR <= 5 // As of cuDNN_v6, the unsuffixed version of cudnnSetConvolution2dDescriptor() // takes an additional 'computeType' parameter to set the precision of the @@ -464,9 +543,15 @@ class CuDNNConvolutionOp { CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type)); CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type)); CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(forward_conv_desc_, param_.num_group)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_, param_.num_group)); + CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_w_, param_.num_group)); #endif + + #if CUDNN_MAJOR <= 6 dshape[1] /= param_.num_group; oshape[1] /= param_.num_group; + #endif weight_offset_ = wshape.Size(); data_offset_ = dstride[1] * dshape[1]; out_offset_ = ostride[1] * oshape[1]; @@ -494,10 +579,17 @@ class CuDNNConvolutionOp { if (!param_.no_bias) { TShape bias = in_shape[conv::kBias]; + #if CUDNN_MAJOR >= 7 + bias_offset_ = bias[0]; + std::vector<int> bias_shape = {1, + static_cast<int>(bias[0]), + 1, 1}; + #else bias_offset_ = bias[0] / param_.num_group; std::vector<int> bias_shape = {1, static_cast<int>(bias[0] / param_.num_group), 1, 1}; + #endif std::vector<int> bias_stride = {static_cast<int>(bias_offset_), 1, 1, 1}; if (param_.kernel.ndim() == 3) { bias_shape.push_back(1); -- To stop receiving notification emails like this one, please contact j...@apache.org.