lupesko commented on a change in pull request #11630: Fix flaky test 
test_deconvolution
URL: https://github.com/apache/incubator-mxnet/pull/11630#discussion_r203256381
 
 

 ##########
 File path: src/operator/linalg_impl.h
 ##########
 @@ -169,23 +169,50 @@ void linalg_gemm<cpu, mshadow::half::half_t>(const 
Tensor<cpu, 2, mshadow::half:
 
 // cublas col-major processing accounted for by switching first two operands
 
-#define LINALG_GPU_GEMM(fname, DType) \
-template<> inline \
-void linalg_gemm<gpu, DType>(const Tensor<gpu, 2, DType>& A, const Tensor<gpu, 
2, DType>& B, \
-                             const Tensor<gpu, 2, DType>& C, DType alpha, 
DType beta, \
-                             bool tA, bool tB, Stream<gpu> *s) { \
-  using namespace mxnet; \
-  using mshadow::gpu; \
-  CHECK_NOTNULL(s); \
-  check_gemm(A, B, C, alpha, beta, tA, tB); \
-  CUBLAS_CALL(cublas##fname(Stream<gpu>::GetBlasHandle(s), \
-                            (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \
-                            (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \
-                            C.size(1), C.size(0), (tB ? B.size(1) : 
B.size(0)), \
-                            &alpha, B.dptr_, B.stride_, A.dptr_, A.stride_, \
-                            &beta, C.dptr_, C.stride_)) \
+#define LINALG_GPU_GEMM(fname, DType)                                      \
+  template <>                                                              \
+  inline void linalg_gemm<gpu, DType>(                                     \
+      const Tensor<gpu, 2, DType>& A, const Tensor<gpu, 2, DType>& B,      \
+      const Tensor<gpu, 2, DType>& C, DType alpha, DType beta, bool tA,    \
+      bool tB, Stream<gpu>* s) {                                           \
+    using namespace mxnet;                                                 \
+    using mshadow::gpu;                                                    \
+    CHECK_NOTNULL(s);                                                      \
+    check_gemm(A, B, C, alpha, beta, tA, tB);                              \
+    CUBLAS_CALL(cublas##fname(                                             \
+        Stream<gpu>::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N),   \
+        (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0),            \
+        (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, B.stride_, A.dptr_, \
+        A.stride_, &beta, C.dptr_, C.stride_))                             \
+  }
+
+#if CUDA_VERSION >= 7050
 
 Review comment:
   A comment here explaining the #if directive would be helpful for other (and 
yourself) in the future. Can we add it please?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to