This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 087f96e  Fix Moderngpu usages in MXNet for CUDA 9 (#7789)
087f96e is described below

commit 087f96e45fb2cb2d305557ba03789ae2ee367417
Author: Przemyslaw Tredak <[email protected]>
AuthorDate: Thu Sep 7 20:31:26 2017 +0200

    Fix Moderngpu usages in MXNet for CUDA 9 (#7789)
    
    * Modify ModernGPU for CUDA 9
    
    * Remove unused shfl_up that triggered compiler warning
---
 .../moderngpu/include/device/intrinsics.cuh        | 43 +++++++++++++---------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git 
a/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
 
b/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
index a601443..afcfc00 100644
--- 
a/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
+++ 
b/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
@@ -104,32 +104,40 @@ MGPU_DEVICE uint prmt_ptx(uint a, uint b, uint index) {
 
 #endif // __CUDA_ARCH__ >= 200
 
-
+#if CUDA_VERSION >= 9000
 
////////////////////////////////////////////////////////////////////////////////
-// shfl_up
-
-__device__ __forceinline__ float shfl_up(float var,
-       unsigned int delta, int width = 32) {
+// shfl_add
 
+MGPU_DEVICE int shfl_add(int x, int offset, int width = WARP_SIZE, unsigned 
int threadmask = 0xFFFFFFFF) {
+       int result = 0;
 #if __CUDA_ARCH__ >= 300
-       var = __shfl_up(var, delta, width);
+       int mask = (WARP_SIZE - width)<< 8;
+       asm(
+               "{.reg .s32 r0;"
+               ".reg .pred p;"
+               "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;"
+               "@p add.s32 r0, r0, %5;"
+               "mov.s32 %0, r0; }"
+               : "=r"(result) : "r"(x), "r"(offset), "r"(mask), 
"r"(threadmask), "r"(x));
 #endif
-       return var;
+       return result;
 }
 
-__device__ __forceinline__ double shfl_up(double var,
-       unsigned int delta, int width = 32) {
-
+MGPU_DEVICE int shfl_max(int x, int offset, int width = WARP_SIZE, unsigned 
int threadmask = 0xFFFFFFFF) {
+       int result = 0;
 #if __CUDA_ARCH__ >= 300
-       int2 p = mgpu::double_as_int2(var);
-       p.x = __shfl_up(p.x, delta, width);
-       p.y = __shfl_up(p.y, delta, width);
-       var = mgpu::int2_as_double(p);
+       int mask = (WARP_SIZE - width)<< 8;
+       asm(
+               "{.reg .s32 r0;"
+               ".reg .pred p;"
+               "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;"
+               "@p max.s32 r0, r0, %5;"
+               "mov.s32 %0, r0; }"
+               : "=r"(result) : "r"(x), "r"(offset), "r"(mask), 
"r"(threadmask), "r"(x));
 #endif
-
-       return var;
+       return result;
 }
-
+#else
 
////////////////////////////////////////////////////////////////////////////////
 // shfl_add
 
@@ -162,6 +170,7 @@ MGPU_DEVICE int shfl_max(int x, int offset, int width = 
WARP_SIZE) {
 #endif
        return result;
 }
+#endif
 
 
////////////////////////////////////////////////////////////////////////////////
 // brev, popc, clz, bfe, bfi, prmt

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to