anirudh2290 closed pull request #11630: Fix flaky test test_deconvolution
URL: https://github.com/apache/incubator-mxnet/pull/11630
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index 08d2add28eb..c0ae97ad3a4 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -169,23 +169,52 @@ void linalg_gemm<cpu, mshadow::half::half_t>(const
Tensor<cpu, 2, mshadow::half:
// cublas col-major processing accounted for by switching first two operands
-#define LINALG_GPU_GEMM(fname, DType) \
-template<> inline \
-void linalg_gemm<gpu, DType>(const Tensor<gpu, 2, DType>& A, const Tensor<gpu,
2, DType>& B, \
- const Tensor<gpu, 2, DType>& C, DType alpha,
DType beta, \
- bool tA, bool tB, Stream<gpu> *s) { \
- using namespace mxnet; \
- using mshadow::gpu; \
- CHECK_NOTNULL(s); \
- check_gemm(A, B, C, alpha, beta, tA, tB); \
- CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
- (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
- (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \
- C.size(1), C.size(0), (tB ? B.size(1) :
B.size(0)), \
- &alpha, B.dptr_, B.stride_, A.dptr_, A.stride_, \
- &beta, C.dptr_, C.stride_)) \
+#define LINALG_GPU_GEMM(fname, DType) \
+ template <> \
+ inline void linalg_gemm<gpu, DType>( \
+ const Tensor<gpu, 2, DType>& A, const Tensor<gpu, 2, DType>& B, \
+ const Tensor<gpu, 2, DType>& C, DType alpha, DType beta, bool tA, \
+ bool tB, Stream<gpu>* s) { \
+ using namespace mxnet; \
+ using mshadow::gpu; \
+ CHECK_NOTNULL(s); \
+ check_gemm(A, B, C, alpha, beta, tA, tB); \
+ CUBLAS_CALL(cublas##fname( \
+ Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
+ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0), \
+ (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, B.stride_, A.dptr_, \
+ A.stride_, &beta, C.dptr_, C.stride_)) \
+ }
+
+// Use cublasSgemmEx when it is available (CUDA >= 7.5). Resolves precision
issues with
+// cublasSgemm. Please see https://github.com/apache/incubator-mxnet/pull/11630
+#if CUDA_VERSION >= 7050
+template <>
+inline void linalg_gemm<gpu, float>(const Tensor<gpu, 2, float>& A,
+ const Tensor<gpu, 2, float>& B,
+ const Tensor<gpu, 2, float>& C, float
alpha,
+ float beta, bool tA, bool tB,
+ Stream<gpu>* s) {
+ using namespace mxnet;
+ using mshadow::gpu;
+ CHECK_NOTNULL(s);
+ check_gemm(A, B, C, alpha, beta, tA, tB);
+#if CUDA_VERSION >= 8000
+ cudaDataType_t full_datatype = CUDA_R_32F;
+#else
+ cublasDataType_t full_datatype = CUBLAS_DATA_FULL;
+#endif
+ CUBLAS_CALL(cublasSgemmEx(
+ Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N),
+ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0),
+ (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, full_datatype, B.stride_,
+ A.dptr_, full_datatype, A.stride_, &beta, C.dptr_, full_datatype,
+ C.stride_))
}
+
+#else
LINALG_GPU_GEMM(Sgemm, float)
+#endif
LINALG_GPU_GEMM(Dgemm, double)
// Version where matrix rows are given by first axis.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services