This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 896271b fix test_countsketch flakiness (#11780)
896271b is described below
commit 896271b341f9104d9d98e076ecaf804b161e1a8f
Author: Hao Jin <[email protected]>
AuthorDate: Tue Jul 17 22:59:43 2018 -0700
fix test_countsketch flakiness (#11780)
---
src/operator/contrib/count_sketch.cu | 9 ++++----
tests/python/gpu/test_operator_gpu.py | 42 +++++++++++++----------------------
2 files changed, 21 insertions(+), 30 deletions(-)
diff --git a/src/operator/contrib/count_sketch.cu
b/src/operator/contrib/count_sketch.cu
index 373ff3e..68dede3 100644
--- a/src/operator/contrib/count_sketch.cu
+++ b/src/operator/contrib/count_sketch.cu
@@ -129,8 +129,8 @@ inline void CountSketchForward(const Tensor<gpu, 2, DType>
&out,
nthreads, out_ptr+bstart*out_dim, h_ptr,
s_ptr, in_ptr+bstart*in_dim, batchlen,
in_dim, out_dim);
- MSHADOW_CUDA_POST_KERNEL_CHECK(sketch_forward_kernel);
- // cudaThreadSynchronize();
+ cudaError_t err = cudaDeviceSynchronize();
+ CHECK_EQ(err, cudaSuccess) << "Error occured! CUDA: " <<
cudaGetErrorString(err);
bstart = (i+1)*batchlen;
}
}
@@ -153,7 +153,7 @@ inline void CountSketchBackward(const Tensor<gpu, 2, DType>
&in_grad,
upper_bound = upper_bound-1;
}
// guarantee there are at least one iteration
- upper_bound = upper_bound > 0? upper_bound:0;
+ upper_bound = upper_bound > 0 ? upper_bound : 0;
int bstart = 0;
for ( int i = 0; i <= upper_bound; i++ ) {
const int batchlen = min(processing_batch_size, n_samples - bstart);
@@ -165,7 +165,8 @@ inline void CountSketchBackward(const Tensor<gpu, 2, DType>
&in_grad,
nthreads, in_grad_ptr+bstart*in_dim, h_ptr,
s_ptr, out_grad_ptr+bstart*out_dim, batchlen,
in_dim, out_dim);
- MSHADOW_CUDA_POST_KERNEL_CHECK(sketch_backward_kernel);
+ cudaError_t err = cudaDeviceSynchronize();
+ CHECK_EQ(err, cudaSuccess) << "Error occured! CUDA: " <<
cudaGetErrorString(err);
bstart = (i+1)*batchlen;
}
}
diff --git a/tests/python/gpu/test_operator_gpu.py
b/tests/python/gpu/test_operator_gpu.py
index 9e9cc60..458028b 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -51,7 +51,10 @@ del test_support_vector_machine_l2_svm
def check_countsketch(in_dim,out_dim,n):
- sym = mx.sym.contrib.count_sketch(name='countsketch',out_dim = out_dim)
+ data = mx.sym.Variable("data")
+ h = mx.sym.Variable("h")
+ s = mx.sym.Variable("s")
+ sym = mx.sym.contrib.count_sketch(data=data, h=h, s=s,
name='countsketch',out_dim = out_dim)
shape = [(n,in_dim), (1,in_dim),(1,in_dim)] #shape of input x, hash h
and hash s
arr = [mx.nd.empty(shape[i]) for i in range(3)]
@@ -62,46 +65,33 @@ def check_countsketch(in_dim,out_dim,n):
arr[1][:] = h #hash h
s = np.random.randint(0, 2, shape[2])*2-np.ones(shape[2])
arr[2][:] = s #hash s
- # forward
- exe_list = [sym.bind(mx.gpu(0), arr, arr_grad)]
- for exe in exe_list:
- exe.forward(is_train= True)
- out1 = [exe.outputs[0].asnumpy() for exe in exe_list]
-
+ locations = {"data": x, "h": h, "s": s}
a = np.zeros((n,out_dim))
temp = np.multiply(x, s)
for num_sample in np.arange(0,n):
for idx in np.arange(0,in_dim):
a[num_sample][h[0][idx]] += temp[num_sample][idx]
- assert_almost_equal(a,out1[0],rtol=1e-3, atol=1e-12)
-
- # backward
+ check_symbolic_forward(sym, locations, [a], rtol=1e-3, atol=1e-5,
ctx=mx.gpu(0))
out_grad = mx.nd.empty((n,out_dim))
out_grad[:] = np.random.normal(-3, 3, (n,out_dim))
- for exe in exe_list:
- exe.backward([out_grad])
-
- a = np.zeros((n,in_dim))
- for j in np.arange(0,n):
- for i in np.arange(0,in_dim):
- a[j,i] = out_grad.asnumpy()[j, h[0,i]] * s[0,i]
- assert_almost_equal(a,arr_grad[0].asnumpy(),rtol=1e-3, atol=1e-12)
+ a = np.zeros((n,in_dim))
+ for j in np.arange(0,n):
+ for i in np.arange(0,in_dim):
+ a[j,i] = out_grad.asnumpy()[j, h[0,i]] * s[0,i]
+ check_symbolic_backward(sym, locations, [out_grad], [a], rtol=1e-3,
atol=1e-5, ctx=mx.gpu(0))
[email protected]("test fails intermittently. temporarily disabled till it gets
fixed. tracked at https://github.com/apache/incubator-mxnet/issues/10988")
-@with_seed(0)
+@with_seed()
def test_countsketch():
- nrepeat = 2
minindim = 40
maxindim = 100
minoutdim = 5
maxoutdim = 30
maxn = 200
- for repeat in range(nrepeat):
- in_dim = np.random.randint(minindim, maxindim)
- out_dim = np.random.randint(minoutdim, maxoutdim)
- n = np.random.randint(1,maxn)
- check_countsketch(in_dim, out_dim, n)
+ in_dim = np.random.randint(minindim, maxindim)
+ out_dim = np.random.randint(minoutdim, maxoutdim)
+ n = np.random.randint(1, maxn)
+ check_countsketch(in_dim, out_dim, n)
def check_ifft(shape):