[incubator-mxnet] branch master updated: fix test_countsketch flakiness (#11780)

zhasheng Tue, 17 Jul 2018 23:00:01 -0700

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/master by this push:
     new 896271b  fix test_countsketch flakiness (#11780)
896271b is described below

commit 896271b341f9104d9d98e076ecaf804b161e1a8f
Author: Hao Jin <[email protected]>
AuthorDate: Tue Jul 17 22:59:43 2018 -0700

    fix test_countsketch flakiness (#11780)
---
 src/operator/contrib/count_sketch.cu  |  9 ++++----
 tests/python/gpu/test_operator_gpu.py | 42 +++++++++++++----------------------
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/src/operator/contrib/count_sketch.cu 
b/src/operator/contrib/count_sketch.cu
index 373ff3e..68dede3 100644
--- a/src/operator/contrib/count_sketch.cu
+++ b/src/operator/contrib/count_sketch.cu
@@ -129,8 +129,8 @@ inline void CountSketchForward(const Tensor<gpu, 2, DType> 
&out,
                                     nthreads, out_ptr+bstart*out_dim, h_ptr,
                                     s_ptr, in_ptr+bstart*in_dim, batchlen,
                                     in_dim, out_dim);
-    MSHADOW_CUDA_POST_KERNEL_CHECK(sketch_forward_kernel);
-    // cudaThreadSynchronize();
+    cudaError_t err = cudaDeviceSynchronize();
+    CHECK_EQ(err, cudaSuccess) << "Error occured! CUDA: " << 
cudaGetErrorString(err);
     bstart = (i+1)*batchlen;
   }
 }
@@ -153,7 +153,7 @@ inline void CountSketchBackward(const Tensor<gpu, 2, DType> 
&in_grad,
     upper_bound = upper_bound-1;
   }
   // guarantee there are at least one iteration
-  upper_bound = upper_bound > 0? upper_bound:0;
+  upper_bound = upper_bound > 0 ? upper_bound : 0;
   int bstart = 0;
   for ( int i = 0; i <= upper_bound; i++ ) {
     const int batchlen = min(processing_batch_size, n_samples - bstart);
@@ -165,7 +165,8 @@ inline void CountSketchBackward(const Tensor<gpu, 2, DType> 
&in_grad,
                             nthreads, in_grad_ptr+bstart*in_dim, h_ptr,
                             s_ptr, out_grad_ptr+bstart*out_dim, batchlen,
                             in_dim, out_dim);
-    MSHADOW_CUDA_POST_KERNEL_CHECK(sketch_backward_kernel);
+    cudaError_t err = cudaDeviceSynchronize();
+    CHECK_EQ(err, cudaSuccess) << "Error occured! CUDA: " << 
cudaGetErrorString(err);
     bstart = (i+1)*batchlen;
   }
 }
diff --git a/tests/python/gpu/test_operator_gpu.py 
b/tests/python/gpu/test_operator_gpu.py
index 9e9cc60..458028b 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -51,7 +51,10 @@ del test_support_vector_machine_l2_svm
 
 
 def check_countsketch(in_dim,out_dim,n):
-    sym = mx.sym.contrib.count_sketch(name='countsketch',out_dim = out_dim)
+    data = mx.sym.Variable("data")
+    h = mx.sym.Variable("h")
+    s = mx.sym.Variable("s")
+    sym = mx.sym.contrib.count_sketch(data=data, h=h, s=s, 
name='countsketch',out_dim = out_dim)
     shape = [(n,in_dim), (1,in_dim),(1,in_dim)]     #shape of input x, hash h 
and hash s
 
     arr = [mx.nd.empty(shape[i]) for i in range(3)]
@@ -62,46 +65,33 @@ def check_countsketch(in_dim,out_dim,n):
     arr[1][:] = h                                 #hash h
     s = np.random.randint(0, 2, shape[2])*2-np.ones(shape[2])
     arr[2][:] = s                                 #hash s
-    # forward
-    exe_list = [sym.bind(mx.gpu(0), arr, arr_grad)]
-    for exe in exe_list:
-        exe.forward(is_train= True)
-    out1 = [exe.outputs[0].asnumpy() for exe in exe_list]
-
+    locations = {"data": x, "h": h, "s": s}
     a = np.zeros((n,out_dim))
     temp = np.multiply(x, s)
     for num_sample in np.arange(0,n):
         for idx in np.arange(0,in_dim):
             a[num_sample][h[0][idx]] += temp[num_sample][idx]
-    assert_almost_equal(a,out1[0],rtol=1e-3, atol=1e-12)
-
-    # backward
+    check_symbolic_forward(sym, locations, [a], rtol=1e-3, atol=1e-5, 
ctx=mx.gpu(0))
     out_grad = mx.nd.empty((n,out_dim))
     out_grad[:] = np.random.normal(-3, 3, (n,out_dim))
-    for exe in exe_list:
-        exe.backward([out_grad])
-
-        a = np.zeros((n,in_dim))
-        for j in np.arange(0,n):
-            for i in np.arange(0,in_dim):
-                a[j,i] = out_grad.asnumpy()[j, h[0,i]] * s[0,i]
-    assert_almost_equal(a,arr_grad[0].asnumpy(),rtol=1e-3, atol=1e-12)
+    a = np.zeros((n,in_dim))
+    for j in np.arange(0,n):
+        for i in np.arange(0,in_dim):
+            a[j,i] = out_grad.asnumpy()[j, h[0,i]] * s[0,i]
+    check_symbolic_backward(sym, locations, [out_grad], [a], rtol=1e-3, 
atol=1e-5, ctx=mx.gpu(0))
 
 
[email protected]("test fails intermittently. temporarily disabled till it gets 
fixed. tracked at https://github.com/apache/incubator-mxnet/issues/10988";)
-@with_seed(0)
+@with_seed()
 def test_countsketch():
-    nrepeat = 2
     minindim = 40
     maxindim = 100
     minoutdim = 5
     maxoutdim = 30
     maxn = 200
-    for repeat in range(nrepeat):
-        in_dim = np.random.randint(minindim, maxindim)
-        out_dim = np.random.randint(minoutdim, maxoutdim)
-        n = np.random.randint(1,maxn)
-        check_countsketch(in_dim, out_dim, n)
+    in_dim = np.random.randint(minindim, maxindim)
+    out_dim = np.random.randint(minoutdim, maxoutdim)
+    n = np.random.randint(1, maxn)
+    check_countsketch(in_dim, out_dim, n)
 
 
 def check_ifft(shape):

[incubator-mxnet] branch master updated: fix test_countsketch flakiness (#11780)

Reply via email to