[GitHub] eric-haibin-lin commented on a change in pull request #11591: [MXNET-331] Single machine All Reduce Topology-aware Communication (Updated)

GitBox Mon, 09 Jul 2018 22:26:42 -0700

eric-haibin-lin commented on a change in pull request #11591: [MXNET-331] 
Single machine All Reduce Topology-aware Communication (Updated)
URL: https://github.com/apache/incubator-mxnet/pull/11591#discussion_r201217760


 ##########
 File path: tests/python/gpu/test_kvstore_gpu.py
 ##########
 @@ -86,34 +102,48 @@ def check_rsp_pull(kv, count, ctxs, is_same_rowid=False, 
use_slice=False):
         check_rsp_pull(kv, 4, [mx.gpu(i//2) for i in range(4)], use_slice=True)
         check_rsp_pull(kv, 4, [mx.cpu(i) for i in range(4)], use_slice=True)
 
-    check_rsp_push_pull('local')
-    check_rsp_push_pull('device')
-    check_rsp_push_pull('device', is_push_cpu=False)
+    # test fails intermittently. temporarily disabled till it gets fixed. 
tracked at https://github.com/apache/incubator-mxnet/issues/9384
+    # check_rsp_push_pull('local')
+    envs = ["","1"]
+    key  = "MXNET_KVSTORE_USETREE"
+    for val in envs:
+        with EnvManager(key, val):
+            check_rsp_push_pull('local')
+            check_rsp_push_pull('device')
+            check_rsp_push_pull('device', is_push_cpu=False)
 
 
 def test_row_sparse_pull_single_device():
-    kvstore = mx.kv.create('device')
-    copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0))
-    grad = copy.tostype("row_sparse")
+    envs = ["","1"]
+    key  = "MXNET_KVSTORE_USETREE"
+    for val in envs:
+        with EnvManager(key, val):
+            kvstore = mx.kv.create('device')
+            copy = mx.nd.random_normal(shape=(4,4), ctx=mx.gpu(0))
+            grad = copy.tostype("row_sparse")
 
-    key = 0
-    kvstore.init(key, grad)
-    idx = grad.indices
-    kvstore.push(key, grad)
-    kvstore.row_sparse_pull(key, out=grad, row_ids=idx)
+            k = 0
+            kvstore.init(k, grad)
+            idx = grad.indices
+            kvstore.push(k, grad)
+            kvstore.row_sparse_pull(k, out=grad, row_ids=idx)
 
-    assert_almost_equal(grad.asnumpy(), copy.asnumpy())
+            assert_almost_equal(grad.asnumpy(), copy.asnumpy())
 
 
 def test_rsp_push_pull_large_rowid():
-    num_rows = 793470
-    val = mx.nd.ones((num_rows, 1)).tostype('row_sparse').copyto(mx.gpu())
-    kv = mx.kv.create('device')
-    kv.init('a', val)
-    out = mx.nd.zeros((num_rows,1), stype='row_sparse').copyto(mx.gpu())
-    kv.push('a', val)
-    kv.row_sparse_pull('a', out=out, row_ids=mx.nd.arange(0, num_rows, 
dtype='int64'))
-    assert(out.indices.shape[0] == num_rows)
+    envs = ["","1"]
+    key  = "MXNET_KVSTORE_USETREE"
+    for val in envs:
+        with EnvManager(key, val):
+            num_rows = 793470
+            val = mx.nd.ones((num_rows, 
1)).tostype('row_sparse').copyto(mx.gpu())
+            kv = mx.kv.create('device')
+            kv.init('a', val)
+            out = mx.nd.zeros((num_rows,1), 
stype='row_sparse').copyto(mx.gpu())
+            kv.push('a', val)
+            kv.row_sparse_pull('a', out=out, row_ids=mx.nd.arange(0, num_rows, 
dtype='int64'))
+            assert(out.indices.shape[0] == num_rows)
 
 
 Review comment:
   Looks like test_kvstore_gpu.py only contains test for rowsparse data. Do you 
mind adding a few basic ones for dense data? 
   Should contains at least small keys and larger ones. For the purpose of unit 
test, you can set gpuarray_bound to some small value.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] eric-haibin-lin commented on a change in pull request #11591: [MXNET-331] Single machine All Reduce Topology-aware Communication (Updated)

Reply via email to