[GitHub] reminisce commented on a change in pull request #7226: Extending the GPU dot operator

2017-08-09 Thread git
reminisce commented on a change in pull request #7226: Extending the GPU dot 
operator
URL: https://github.com/apache/incubator-mxnet/pull/7226#discussion_r132054842
 
 

 ##
 File path: benchmark/python/dot.py
 ##
 @@ -0,0 +1,265 @@
+import ctypes
+
+from mxnet.test_utils import *
+import scipy.sparse as sp
+import os
+import time
+import argparse
+
+from mxnet.base import check_call, _LIB
+from util import get_data, estimate_density
+
+parser = argparse.ArgumentParser(description="Benchmark sparse operators",
+ 
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--num-omp-threads', type=int, default=1, help='number of 
omp threads to set in MXNet')
+args = parser.parse_args()
+
+# some data information
+kdda = {
+'data_mini': 'kdda.t.mini',
+'data_name': 'kdda.t',
+'data_origin_name': 'kdda.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2;,
+'feature_dim': 20216830,
+'m': 200,
+'batch_size': [64]
+}
+
+avazu = {
+'data_mini': 'avazu-app.t.mini',
+'data_name': 'avazu-app.t',
+'data_origin_name': 'avazu-app.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2;,
+'feature_dim': 100,
+'m': 500,
+'batch_size': [64, 128]
+}
+
+
+def measure_cost(wait, repeat, f, *args, **kwargs):
+start = time.time()
+if wait:
+for i in range(repeat):
+(f(*args, **kwargs)).wait_to_read()
+else:
+for i in range(repeat):
+f(*args, **kwargs)
+end = time.time()
+diff = end - start
+return diff / repeat
+
+
+def test_dot_real(data_dict):
+def get_iter(path, data_shape, batch_size):
+data_train = mx.io.LibSVMIter(data_libsvm=path,
+  data_shape=data_shape,
+  batch_size=batch_size)
+data_iter = iter(data_train)
+return data_iter
+
+data_dir = os.path.join(os.getcwd(), 'data')
+
+path = os.path.join(data_dir, data_dict['data_name'])
+if not os.path.exists(path):
+get_data(
+data_dir,
+data_dict['data_name'],
+data_dict['url'],
+data_dict['data_origin_name']
+)
+assert os.path.exists(path)
+
+k = data_dict['feature_dim']
+m = data_dict['m']
+density = estimate_density(path, data_dict['feature_dim'])
+
+mini_path = os.path.join(data_dir, data_dict['data_mini'])
+if not os.path.exists(mini_path):
+os.system("head -n 2000 %r > %r" % (path, mini_path))
+assert os.path.exists(mini_path)
+
+print "Running Benchmarking on %r data" % data_dict['data_mini']
+for batch_size in data_dict['batch_size']:  # iterator through different 
batch size of choice
+print "batch_size is %d" % batch_size
+# model
+data_shape = (k, )
+train_iter = get_iter(mini_path, data_shape, batch_size)
+weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m))
+
+csr_data = []
+dns_data = []
+num_batch = 0
+for batch in train_iter:
+data = train_iter.getdata()
+csr_data.append(data)
+dns_data.append(data.todense())
+num_batch += 1
+bag_of_data = [csr_data, dns_data]
+num_repeat = 5
+costs = []
+for d in bag_of_data:
+weight.wait_to_read()
+cost = 0.
+count = 0
+for d_batch in d:
+d_batch.wait_to_read()
+cost += measure_cost(True, num_repeat, mx.nd.dot, d_batch, 
weight)
+count += 1
+costs.append(cost/count)
+t_sparse = costs[0]
+t_dense = costs[1]
+ratio = t_dense / t_sparse
+print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
+fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
+print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, 
t_sparse))
+
+
+def test_dot_synthetic():
+"""benchmark sparse mxnet dot and scipy dot operator with matrices of 
given density.
+`t_sparse` is the runtime of the invoked sparse dot operator in ms, while 
`t_dense` is the 
+runtime of dot(dns, dns), with the same matrices except that they are in 
default storage type.
+"""
+# Benchmark MXNet's sparse dot operator
+def bench_mx_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, 
rhs_den, trans_lhs, ctx, repeat):
+set_default_context(ctx)
+# Create matrix instances
+lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den)
+rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den)
+lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.todense()
+rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.todense()
+# One warm up run, verify correctness
+out = 

[GitHub] reminisce commented on a change in pull request #7226: Extending the GPU dot operator

2017-08-09 Thread git
reminisce commented on a change in pull request #7226: Extending the GPU dot 
operator
URL: https://github.com/apache/incubator-mxnet/pull/7226#discussion_r132054939
 
 

 ##
 File path: benchmark/python/dot.py
 ##
 @@ -0,0 +1,265 @@
+import ctypes
+
+from mxnet.test_utils import *
+import scipy.sparse as sp
+import os
+import time
+import argparse
+
+from mxnet.base import check_call, _LIB
+from util import get_data, estimate_density
+
+parser = argparse.ArgumentParser(description="Benchmark sparse operators",
+ 
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--num-omp-threads', type=int, default=1, help='number of 
omp threads to set in MXNet')
+args = parser.parse_args()
+
+# some data information
+kdda = {
+'data_mini': 'kdda.t.mini',
+'data_name': 'kdda.t',
+'data_origin_name': 'kdda.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2;,
+'feature_dim': 20216830,
+'m': 200,
+'batch_size': [64]
+}
+
+avazu = {
+'data_mini': 'avazu-app.t.mini',
+'data_name': 'avazu-app.t',
+'data_origin_name': 'avazu-app.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2;,
+'feature_dim': 100,
+'m': 500,
+'batch_size': [64, 128]
+}
+
+
+def measure_cost(wait, repeat, f, *args, **kwargs):
+start = time.time()
+if wait:
+for i in range(repeat):
+(f(*args, **kwargs)).wait_to_read()
+else:
+for i in range(repeat):
+f(*args, **kwargs)
+end = time.time()
+diff = end - start
+return diff / repeat
+
+
+def test_dot_real(data_dict):
+def get_iter(path, data_shape, batch_size):
+data_train = mx.io.LibSVMIter(data_libsvm=path,
+  data_shape=data_shape,
+  batch_size=batch_size)
+data_iter = iter(data_train)
+return data_iter
+
+data_dir = os.path.join(os.getcwd(), 'data')
+
+path = os.path.join(data_dir, data_dict['data_name'])
+if not os.path.exists(path):
+get_data(
+data_dir,
+data_dict['data_name'],
+data_dict['url'],
+data_dict['data_origin_name']
+)
+assert os.path.exists(path)
+
+k = data_dict['feature_dim']
+m = data_dict['m']
+density = estimate_density(path, data_dict['feature_dim'])
+
+mini_path = os.path.join(data_dir, data_dict['data_mini'])
+if not os.path.exists(mini_path):
+os.system("head -n 2000 %r > %r" % (path, mini_path))
+assert os.path.exists(mini_path)
+
+print "Running Benchmarking on %r data" % data_dict['data_mini']
+for batch_size in data_dict['batch_size']:  # iterator through different 
batch size of choice
+print "batch_size is %d" % batch_size
+# model
+data_shape = (k, )
+train_iter = get_iter(mini_path, data_shape, batch_size)
+weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m))
+
+csr_data = []
+dns_data = []
+num_batch = 0
+for batch in train_iter:
+data = train_iter.getdata()
+csr_data.append(data)
+dns_data.append(data.todense())
+num_batch += 1
+bag_of_data = [csr_data, dns_data]
+num_repeat = 5
+costs = []
+for d in bag_of_data:
+weight.wait_to_read()
+cost = 0.
+count = 0
+for d_batch in d:
+d_batch.wait_to_read()
+cost += measure_cost(True, num_repeat, mx.nd.dot, d_batch, 
weight)
+count += 1
+costs.append(cost/count)
+t_sparse = costs[0]
+t_dense = costs[1]
+ratio = t_dense / t_sparse
+print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
+fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
+print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, 
t_sparse))
+
+
+def test_dot_synthetic():
+"""benchmark sparse mxnet dot and scipy dot operator with matrices of 
given density.
+`t_sparse` is the runtime of the invoked sparse dot operator in ms, while 
`t_dense` is the 
+runtime of dot(dns, dns), with the same matrices except that they are in 
default storage type.
+"""
+# Benchmark MXNet's sparse dot operator
+def bench_mx_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, 
rhs_den, trans_lhs, ctx, repeat):
+set_default_context(ctx)
+# Create matrix instances
+lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den)
+rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den)
+lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.todense()
+rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.todense()
+# One warm up run, verify correctness
+out = 

[GitHub] reminisce commented on a change in pull request #7226: Extending the GPU dot operator

2017-08-09 Thread git
reminisce commented on a change in pull request #7226: Extending the GPU dot 
operator
URL: https://github.com/apache/incubator-mxnet/pull/7226#discussion_r132054842
 
 

 ##
 File path: benchmark/python/dot.py
 ##
 @@ -0,0 +1,265 @@
+import ctypes
+
+from mxnet.test_utils import *
+import scipy.sparse as sp
+import os
+import time
+import argparse
+
+from mxnet.base import check_call, _LIB
+from util import get_data, estimate_density
+
+parser = argparse.ArgumentParser(description="Benchmark sparse operators",
+ 
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--num-omp-threads', type=int, default=1, help='number of 
omp threads to set in MXNet')
+args = parser.parse_args()
+
+# some data information
+kdda = {
+'data_mini': 'kdda.t.mini',
+'data_name': 'kdda.t',
+'data_origin_name': 'kdda.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2;,
+'feature_dim': 20216830,
+'m': 200,
+'batch_size': [64]
+}
+
+avazu = {
+'data_mini': 'avazu-app.t.mini',
+'data_name': 'avazu-app.t',
+'data_origin_name': 'avazu-app.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2;,
+'feature_dim': 100,
+'m': 500,
+'batch_size': [64, 128]
+}
+
+
+def measure_cost(wait, repeat, f, *args, **kwargs):
+start = time.time()
+if wait:
+for i in range(repeat):
+(f(*args, **kwargs)).wait_to_read()
+else:
+for i in range(repeat):
+f(*args, **kwargs)
+end = time.time()
+diff = end - start
+return diff / repeat
+
+
+def test_dot_real(data_dict):
+def get_iter(path, data_shape, batch_size):
+data_train = mx.io.LibSVMIter(data_libsvm=path,
+  data_shape=data_shape,
+  batch_size=batch_size)
+data_iter = iter(data_train)
+return data_iter
+
+data_dir = os.path.join(os.getcwd(), 'data')
+
+path = os.path.join(data_dir, data_dict['data_name'])
+if not os.path.exists(path):
+get_data(
+data_dir,
+data_dict['data_name'],
+data_dict['url'],
+data_dict['data_origin_name']
+)
+assert os.path.exists(path)
+
+k = data_dict['feature_dim']
+m = data_dict['m']
+density = estimate_density(path, data_dict['feature_dim'])
+
+mini_path = os.path.join(data_dir, data_dict['data_mini'])
+if not os.path.exists(mini_path):
+os.system("head -n 2000 %r > %r" % (path, mini_path))
+assert os.path.exists(mini_path)
+
+print "Running Benchmarking on %r data" % data_dict['data_mini']
+for batch_size in data_dict['batch_size']:  # iterator through different 
batch size of choice
+print "batch_size is %d" % batch_size
+# model
+data_shape = (k, )
+train_iter = get_iter(mini_path, data_shape, batch_size)
+weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m))
+
+csr_data = []
+dns_data = []
+num_batch = 0
+for batch in train_iter:
+data = train_iter.getdata()
+csr_data.append(data)
+dns_data.append(data.todense())
+num_batch += 1
+bag_of_data = [csr_data, dns_data]
+num_repeat = 5
+costs = []
+for d in bag_of_data:
+weight.wait_to_read()
+cost = 0.
+count = 0
+for d_batch in d:
+d_batch.wait_to_read()
+cost += measure_cost(True, num_repeat, mx.nd.dot, d_batch, 
weight)
+count += 1
+costs.append(cost/count)
+t_sparse = costs[0]
+t_dense = costs[1]
+ratio = t_dense / t_sparse
+print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
+fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
+print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, 
t_sparse))
+
+
+def test_dot_synthetic():
+"""benchmark sparse mxnet dot and scipy dot operator with matrices of 
given density.
+`t_sparse` is the runtime of the invoked sparse dot operator in ms, while 
`t_dense` is the 
+runtime of dot(dns, dns), with the same matrices except that they are in 
default storage type.
+"""
+# Benchmark MXNet's sparse dot operator
+def bench_mx_dot(lhs_shape, rhs_shape, lhs_stype, rhs_stype, lhs_den, 
rhs_den, trans_lhs, ctx, repeat):
+set_default_context(ctx)
+# Create matrix instances
+lhs_nd = rand_ndarray(lhs_shape, lhs_stype, density=lhs_den)
+rhs_nd = rand_ndarray(rhs_shape, rhs_stype, density=rhs_den)
+lhs_dns = lhs_nd if lhs_stype == 'default' else lhs_nd.todense()
+rhs_dns = rhs_nd if rhs_stype == 'default' else rhs_nd.todense()
+# One warm up run, verify correctness
+out = 

[GitHub] reminisce commented on a change in pull request #7226: Extending the GPU dot operator

2017-08-09 Thread git
reminisce commented on a change in pull request #7226: Extending the GPU dot 
operator
URL: https://github.com/apache/incubator-mxnet/pull/7226#discussion_r132052984
 
 

 ##
 File path: benchmark/python/dot.py
 ##
 @@ -0,0 +1,265 @@
+import ctypes
+
+from mxnet.test_utils import *
+import scipy.sparse as sp
+import os
+import time
+import argparse
+
+from mxnet.base import check_call, _LIB
+from util import get_data, estimate_density
+
+parser = argparse.ArgumentParser(description="Benchmark sparse operators",
+ 
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--num-omp-threads', type=int, default=1, help='number of 
omp threads to set in MXNet')
+args = parser.parse_args()
+
+# some data information
+kdda = {
+'data_mini': 'kdda.t.mini',
+'data_name': 'kdda.t',
+'data_origin_name': 'kdda.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/kdda.t.bz2;,
+'feature_dim': 20216830,
+'m': 200,
+'batch_size': [64]
+}
+
+avazu = {
+'data_mini': 'avazu-app.t.mini',
+'data_name': 'avazu-app.t',
+'data_origin_name': 'avazu-app.t.bz2',
+'url': 
"https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/avazu-app.t.bz2;,
+'feature_dim': 100,
+'m': 500,
+'batch_size': [64, 128]
+}
+
+
+def measure_cost(wait, repeat, f, *args, **kwargs):
 
 Review comment:
   I got the point of adding `wait` as an argument here from reading the code 
below. Could you add a comment explaining that `wait=True` is for mxnet 
benchmark and `False` for scipy?
 

This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services