This is an automated email from the ASF dual-hosted git repository.
jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 087f96e Fix Moderngpu usages in MXNet for CUDA 9 (#7789)
087f96e is described below
commit 087f96e45fb2cb2d305557ba03789ae2ee367417
Author: Przemyslaw Tredak <[email protected]>
AuthorDate: Thu Sep 7 20:31:26 2017 +0200
Fix Moderngpu usages in MXNet for CUDA 9 (#7789)
* Modify ModernGPU for CUDA 9
* Remove unused shfl_up that triggered compiler warning
---
.../moderngpu/include/device/intrinsics.cuh | 43 +++++++++++++---------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git
a/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
b/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
index a601443..afcfc00 100644
---
a/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
+++
b/src/operator/contrib/ctc_include/contrib/moderngpu/include/device/intrinsics.cuh
@@ -104,32 +104,40 @@ MGPU_DEVICE uint prmt_ptx(uint a, uint b, uint index) {
#endif // __CUDA_ARCH__ >= 200
-
+#if CUDA_VERSION >= 9000
////////////////////////////////////////////////////////////////////////////////
-// shfl_up
-
-__device__ __forceinline__ float shfl_up(float var,
- unsigned int delta, int width = 32) {
+// shfl_add
+MGPU_DEVICE int shfl_add(int x, int offset, int width = WARP_SIZE, unsigned
int threadmask = 0xFFFFFFFF) {
+ int result = 0;
#if __CUDA_ARCH__ >= 300
- var = __shfl_up(var, delta, width);
+ int mask = (WARP_SIZE - width)<< 8;
+ asm(
+ "{.reg .s32 r0;"
+ ".reg .pred p;"
+ "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;"
+ "@p add.s32 r0, r0, %5;"
+ "mov.s32 %0, r0; }"
+ : "=r"(result) : "r"(x), "r"(offset), "r"(mask),
"r"(threadmask), "r"(x));
#endif
- return var;
+ return result;
}
-__device__ __forceinline__ double shfl_up(double var,
- unsigned int delta, int width = 32) {
-
+MGPU_DEVICE int shfl_max(int x, int offset, int width = WARP_SIZE, unsigned
int threadmask = 0xFFFFFFFF) {
+ int result = 0;
#if __CUDA_ARCH__ >= 300
- int2 p = mgpu::double_as_int2(var);
- p.x = __shfl_up(p.x, delta, width);
- p.y = __shfl_up(p.y, delta, width);
- var = mgpu::int2_as_double(p);
+ int mask = (WARP_SIZE - width)<< 8;
+ asm(
+ "{.reg .s32 r0;"
+ ".reg .pred p;"
+ "shfl.sync.up.b32 r0|p, %1, %2, %3, %4;"
+ "@p max.s32 r0, r0, %5;"
+ "mov.s32 %0, r0; }"
+ : "=r"(result) : "r"(x), "r"(offset), "r"(mask),
"r"(threadmask), "r"(x));
#endif
-
- return var;
+ return result;
}
-
+#else
////////////////////////////////////////////////////////////////////////////////
// shfl_add
@@ -162,6 +170,7 @@ MGPU_DEVICE int shfl_max(int x, int offset, int width =
WARP_SIZE) {
#endif
return result;
}
+#endif
////////////////////////////////////////////////////////////////////////////////
// brev, popc, clz, bfe, bfi, prmt
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].