[GitHub] anirudh2290 closed pull request #11630: Fix flaky test test_deconvolution

GitBox Fri, 27 Jul 2018 17:51:28 -0700

anirudh2290 closed pull request #11630: Fix flaky test test_deconvolution
URL: https://github.com/apache/incubator-mxnet/pull/11630


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index 08d2add28eb..c0ae97ad3a4 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -169,23 +169,52 @@ void linalg_gemm<cpu, mshadow::half::half_t>(const 
Tensor<cpu, 2, mshadow::half:
 
 // cublas col-major processing accounted for by switching first two operands
 
-#define LINALG_GPU_GEMM(fname, DType) \
-template<> inline \
-void linalg_gemm<gpu, DType>(const Tensor<gpu, 2, DType>& A, const Tensor<gpu, 
2, DType>& B, \
-                             const Tensor<gpu, 2, DType>& C, DType alpha, 
DType beta, \
-                             bool tA, bool tB, Stream<gpu> *s) { \
-  using namespace mxnet; \
-  using mshadow::gpu; \
-  CHECK_NOTNULL(s); \
-  check_gemm(A, B, C, alpha, beta, tA, tB); \
-  CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
-                            (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
-                            (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \
-                            C.size(1), C.size(0), (tB ? B.size(1) : 
B.size(0)), \
-                            &alpha, B.dptr_, B.stride_, A.dptr_, A.stride_, \
-                            &beta, C.dptr_, C.stride_)) \
+#define LINALG_GPU_GEMM(fname, DType)                                      \
+  template <>                                                              \
+  inline void linalg_gemm<gpu, DType>(                                     \
+      const Tensor<gpu, 2, DType>& A, const Tensor<gpu, 2, DType>& B,      \
+      const Tensor<gpu, 2, DType>& C, DType alpha, DType beta, bool tA,    \
+      bool tB, Stream<gpu>* s) {                                           \
+    using namespace mxnet;                                                 \
+    using mshadow::gpu;                                                    \
+    CHECK_NOTNULL(s);                                                      \
+    check_gemm(A, B, C, alpha, beta, tA, tB);                              \
+    CUBLAS_CALL(cublas##fname(                                             \
+        Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N),   \
+        (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0),            \
+        (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, B.stride_, A.dptr_, \
+        A.stride_, &beta, C.dptr_, C.stride_))                             \
+  }
+
+// Use cublasSgemmEx when it is available (CUDA >= 7.5). Resolves precision 
issues with
+// cublasSgemm. Please see https://github.com/apache/incubator-mxnet/pull/11630
+#if CUDA_VERSION >= 7050
+template <>
+inline void linalg_gemm<gpu, float>(const Tensor<gpu, 2, float>& A,
+                                    const Tensor<gpu, 2, float>& B,
+                                    const Tensor<gpu, 2, float>& C, float 
alpha,
+                                    float beta, bool tA, bool tB,
+                                    Stream<gpu>* s) {
+  using namespace mxnet;
+  using mshadow::gpu;
+  CHECK_NOTNULL(s);
+  check_gemm(A, B, C, alpha, beta, tA, tB);
+#if CUDA_VERSION >= 8000
+  cudaDataType_t full_datatype = CUDA_R_32F;
+#else
+  cublasDataType_t full_datatype = CUBLAS_DATA_FULL;
+#endif
+  CUBLAS_CALL(cublasSgemmEx(
+      Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N),
+      (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0),
+      (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, full_datatype, B.stride_,
+      A.dptr_, full_datatype, A.stride_, &beta, C.dptr_, full_datatype,
+      C.stride_))
 }
+
+#else
 LINALG_GPU_GEMM(Sgemm, float)
+#endif
 LINALG_GPU_GEMM(Dgemm, double)
 
 // Version where matrix rows are given by first axis.


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] anirudh2290 closed pull request #11630: Fix flaky test test_deconvolution

Reply via email to