eric-haibin-lin closed pull request #12722: Add support for CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION URL: https://github.com/apache/incubator-mxnet/pull/12722
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md index 0664d790741..ecc78c08f9e 100644 --- a/docs/faq/env_var.md +++ b/docs/faq/env_var.md @@ -158,7 +158,17 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca - Performance tests are run to pick the convolution algo when value is 1 or 2 - Value of 1 chooses the best algo in a limited workspace - Value of 2 chooses the fastest algo whose memory requirements may be larger than the default workspace threshold - + +* MXNET_CUDA_ALLOW_TENSOR_CORE + - 0(false) or 1(true) ```(default=1)``` + - If set to '0', disallows Tensor Core use in CUDA ops. + - If set to '1', allows Tensor Core use in CUDA ops. + - This variable can only be set once in a session. + +* MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION + - 0(false) or 1(true) ```(default=0)``` + - If set to '0', disallows implicit type conversions to Float16 to use Tensor Cores + - If set to '1', allows CUDA ops like RNN and Convolution to use TensorCores even with Float32 input data by using implicit type casting to Float16. Only has an effect if `MXNET_CUDA_ALLOW_TENSOR_CORE` is `1`. * MXNET_GLUON_REPO - Values: String ```(default='https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'``` diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h index b4b10c2c75b..0ada350b1ed 100644 --- a/src/common/cuda_utils.h +++ b/src/common/cuda_utils.h @@ -374,6 +374,22 @@ inline bool GetEnvAllowTensorCore() { return allow_tensor_core; } +// The policy if the user hasn't set the environment variable +// CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION +#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false + +/*! + * \brief Returns global policy for TensorCore implicit type casting + */ +inline bool GetEnvAllowTensorCoreConversion() { + // Use of optional<bool> here permits: "0", "1", "true" and "false" to all be + // legal. + bool default_value = MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT; + return dmlc::GetEnv("MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION", + dmlc::optional<bool>(default_value)) + .value(); +} + #if CUDA_VERSION >= 9000 // Sets the cuBLAS math mode that determines the 'allow TensorCore' policy. Returns previous. inline cublasMath_t SetCublasMathMode(cublasHandle_t blas_handle, cublasMath_t new_math_type) { diff --git a/src/operator/cudnn_rnn-inl.h b/src/operator/cudnn_rnn-inl.h index b33a717d15b..077428f5474 100644 --- a/src/operator/cudnn_rnn-inl.h +++ b/src/operator/cudnn_rnn-inl.h @@ -496,6 +496,11 @@ class CuDNNRNNOp : public Operator{ if (cudnn_tensor_core_ && rnn_algo == CUDNN_RNN_ALGO_STANDARD) { math_type = CUDNN_TENSOR_OP_MATH; } + #if CUDNN_VERSION >= 7200 + if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() && + (DataType<DType>::kFlag != kFloat16)) + math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION; + #endif CUDNN_CALL(cudnnSetRNNMatrixMathType(rnn_desc_, math_type)); #endif // Get temp space sizes diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index acdd6497665..53bd76c9c3e 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -543,6 +543,11 @@ class CuDNNConvolutionOp { #if CUDNN_MAJOR >= 7 cudnnMathType_t math_type = cudnn_tensor_core_ ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH; + #if CUDNN_VERSION >= 7200 + if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() && + (DataType<DType>::kFlag != kFloat16)) + math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION; + #endif CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type)); CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type)); CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type)); ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
