eric-haibin-lin closed pull request #12722: Add support for 
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION
URL: https://github.com/apache/incubator-mxnet/pull/12722
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md
index 0664d790741..ecc78c08f9e 100644
--- a/docs/faq/env_var.md
+++ b/docs/faq/env_var.md
@@ -158,7 +158,17 @@ When USE_PROFILER is enabled in Makefile or CMake, the 
following environments ca
   - Performance tests are run to pick the convolution algo when value is 1 or 2
   - Value of 1 chooses the best algo in a limited workspace
   - Value of 2 chooses the fastest algo whose memory requirements may be 
larger than the default workspace threshold
-  
+
+* MXNET_CUDA_ALLOW_TENSOR_CORE
+  - 0(false) or 1(true) ```(default=1)```
+       - If set to '0', disallows Tensor Core use in CUDA ops.
+       - If set to '1', allows Tensor Core use in CUDA ops.
+  - This variable can only be set once in a session.
+
+* MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION
+  - 0(false) or 1(true) ```(default=0)```
+       - If set to '0', disallows implicit type conversions to Float16 to use 
Tensor Cores
+       - If set to '1', allows CUDA ops like RNN and Convolution to use 
TensorCores even with Float32 input data by using implicit type casting to 
Float16. Only has an effect if `MXNET_CUDA_ALLOW_TENSOR_CORE` is `1`.
 
 * MXNET_GLUON_REPO
   - Values: String 
```(default='https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'```
diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h
index b4b10c2c75b..0ada350b1ed 100644
--- a/src/common/cuda_utils.h
+++ b/src/common/cuda_utils.h
@@ -374,6 +374,22 @@ inline bool GetEnvAllowTensorCore() {
   return allow_tensor_core;
 }
 
+// The policy if the user hasn't set the environment variable
+// CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION
+#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false
+
+/*!
+ * \brief Returns global policy for TensorCore implicit type casting
+ */
+inline bool GetEnvAllowTensorCoreConversion() {
+  // Use of optional<bool> here permits: "0", "1", "true" and "false" to all be
+  // legal.
+  bool default_value = MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT;
+  return dmlc::GetEnv("MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION",
+                      dmlc::optional<bool>(default_value))
+      .value();
+}
+
 #if CUDA_VERSION >= 9000
 // Sets the cuBLAS math mode that determines the 'allow TensorCore' policy.  
Returns previous.
 inline cublasMath_t SetCublasMathMode(cublasHandle_t blas_handle, cublasMath_t 
new_math_type) {
diff --git a/src/operator/cudnn_rnn-inl.h b/src/operator/cudnn_rnn-inl.h
index b33a717d15b..077428f5474 100644
--- a/src/operator/cudnn_rnn-inl.h
+++ b/src/operator/cudnn_rnn-inl.h
@@ -496,6 +496,11 @@ class CuDNNRNNOp : public Operator{
         if (cudnn_tensor_core_ && rnn_algo == CUDNN_RNN_ALGO_STANDARD) {
           math_type = CUDNN_TENSOR_OP_MATH;
         }
+      #if CUDNN_VERSION >= 7200
+            if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() &&
+                (DataType<DType>::kFlag != kFloat16))
+              math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION;
+      #endif
         CUDNN_CALL(cudnnSetRNNMatrixMathType(rnn_desc_, math_type));
       #endif
       // Get temp space sizes
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h 
b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index acdd6497665..53bd76c9c3e 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -543,6 +543,11 @@ class CuDNNConvolutionOp {
     #if CUDNN_MAJOR >= 7
       cudnnMathType_t math_type = cudnn_tensor_core_ ? CUDNN_TENSOR_OP_MATH
                                                     : CUDNN_DEFAULT_MATH;
+      #if CUDNN_VERSION >= 7200
+            if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() &&
+                (DataType<DType>::kFlag != kFloat16))
+              math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION;
+      #endif
       CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type));
       CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type));
       CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type));


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to