This is an automated email from the ASF dual-hosted git repository.
apeforest pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 95c5189 [Large Tensor] Implemented LT flag for OpPerf testing (#17449)
95c5189 is described below
commit 95c5189cc7f1a6d8a45fde65f500d74b8fa02c53
Author: Connor Goggins <[email protected]>
AuthorDate: Sat Feb 29 00:43:08 2020 -0800
[Large Tensor] Implemented LT flag for OpPerf testing (#17449)
* Passing large_tensor parameter down
* Adding large tensor testing functionality for convolutional operators
* Added large tensor test functionality for conv ops
* Fixing sizing for conv ops
* Added gemm large tensor, print on conv
* Updated input for gemm ops and print statements
* Fixed deconv large tensor test
* Added bias for deconv
* Added test functionality for nn_activation and nn_basic ops
* Fixed deconv bias, implemented large tensor test logic for general ops,
added default data for large tensor test
* Dropped unnecessary print statements
* Fixed lint errors
* Added large_tensor parameter to existing function descriptions, added
descriptions for functions missing descriptions
* Adding docs, changed large_tensor to int64_tensor for clarity
* Added warmup/runs to gemm ops, debugging process failure
* Resolved merge conficts, added default params and input switching
functionality
* Dynamic input handling for default inputs, additional custom data for
int64
* Fixed RPD issue
* Everything through reduction ops working
* Passing large_tensor parameter down
* Adding large tensor testing functionality for convolutional operators
* Added large tensor test functionality for conv ops
* Fixing sizing for conv ops
* Added gemm large tensor, print on conv
* Updated input for gemm ops and print statements
* Fixed deconv large tensor test
* Added bias for deconv
* Added test functionality for nn_activation and nn_basic ops
* Fixed deconv bias, implemented large tensor test logic for general ops,
added default data for large tensor test
* Dropped unnecessary print statements
* Fixed lint errors
* Added large_tensor parameter to existing function descriptions, added
descriptions for functions missing descriptions
* Adding docs, changed large_tensor to int64_tensor for clarity
* Added warmup/runs to gemm ops, debugging process failure
* Resolved merge conficts, added default params and input switching
functionality
* Dynamic input handling for default inputs, additional custom data for
int64
* Fixed RPD issue
* Everything through reduction ops working
* Random sampling & loss ops working
* Added indices, depth, ravel_data in default_params
* Added indexing ops - waiting for merge on ravel
* Added optimizer ops
* All misc ops working
* All NN Basic ops working
* Fixed LT input for ROIPooling
* Refactored NN Conv tests
* Added test for inline optimizer ops
* Dropping extra tests to decrease execution time
* Switching to inline tests for RNN to support additional modes
* Added state_cell as NDArray param, removed linalg testing for int64 tensor
* Cleaned up styling
* Fixed conv and deconv tests
* Retrigger CI for continuous build
* Cleaned up GEMM op inputs
* Dropped unused param from default_params
---
benchmark/opperf/nd_operations/array_rearrange.py | 8 +-
benchmark/opperf/nd_operations/binary_operators.py | 26 +-
benchmark/opperf/nd_operations/gemm_operators.py | 84 +++--
.../opperf/nd_operations/indexing_routines.py | 8 +-
benchmark/opperf/nd_operations/linalg_operators.py | 8 +-
benchmark/opperf/nd_operations/misc_operators.py | 73 ++--
.../nd_operations/nn_activation_operators.py | 10 +-
.../opperf/nd_operations/nn_basic_operators.py | 78 ++++-
.../opperf/nd_operations/nn_conv_operators.py | 287 +++++++++++-----
.../opperf/nd_operations/nn_loss_operators.py | 8 +-
.../opperf/nd_operations/nn_optimizer_operators.py | 66 ++--
.../nd_operations/random_sampling_operators.py | 8 +-
.../opperf/nd_operations/reduction_operators.py | 8 +-
.../nd_operations/sorting_searching_operators.py | 8 +-
benchmark/opperf/nd_operations/unary_operators.py | 26 +-
benchmark/opperf/opperf.py | 56 ++--
benchmark/opperf/rules/default_params.py | 371 ++++++++++++++++++++-
benchmark/opperf/utils/benchmark_utils.py | 4 +-
benchmark/opperf/utils/op_registry_utils.py | 57 ++--
19 files changed, 941 insertions(+), 253 deletions(-)
diff --git a/benchmark/opperf/nd_operations/array_rearrange.py
b/benchmark/opperf/nd_operations/array_rearrange.py
index 12af834..631d0bb 100644
--- a/benchmark/opperf/nd_operations/array_rearrange.py
+++ b/benchmark/opperf/nd_operations/array_rearrange.py
@@ -29,8 +29,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_rearrange_operators
"""
-def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all the
+def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the
rearrange operators in MXNet.
Parameters
@@ -41,6 +41,8 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -55,5 +57,5 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='
mx_rearrange_ops = get_all_rearrange_operators()
# Run benchmarks
- mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx,
profiler, warmup, runs)
+ mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
return mx_rearrange_op_results
diff --git a/benchmark/opperf/nd_operations/binary_operators.py
b/benchmark/opperf/nd_operations/binary_operators.py
index 5d95360..4444219 100644
--- a/benchmark/opperf/nd_operations/binary_operators.py
+++ b/benchmark/opperf/nd_operations/binary_operators.py
@@ -38,8 +38,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_broadcast_binary_op
get_all_elemen_wise_binary_operators, get_all_misc_binary_operators
-def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the miscellaneous
+def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the miscellaneous
binary operators in MXNet.
Parameters
@@ -48,6 +48,10 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profi
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
+ profiler: str, default 'native'
+ Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -61,12 +65,12 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profi
# Fetch all Miscellaneous Binary Operators
mx_binary_misc_ops = get_all_misc_binary_operators()
# Run benchmarks
- mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx,
profiler, warmup, runs)
+ mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
return mx_binary_op_results
-def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the binary
+def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the binary
broadcast operators in MXNet.
Parameters
@@ -77,6 +81,8 @@ def
run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -90,12 +96,12 @@ def
run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
# Fetch all Binary Broadcast Operators
mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
# Run benchmarks
- mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype,
ctx, profiler, warmup, runs)
+ mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype,
ctx, profiler, int64_tensor, warmup, runs)
return mx_binary_op_results
-def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the binary
+def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the binary
element_wise operators in MXNet.
Parameters
@@ -106,6 +112,8 @@ def
run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 10
Number of times to run for warmup
runs: int, default 50
@@ -119,5 +127,5 @@ def
run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
# Fetch all Binary Element_wise Operators
mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
# Run benchmarks
- mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops,
dtype, ctx, profiler, warmup, runs)
+ mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops,
dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_binary_op_results
diff --git a/benchmark/opperf/nd_operations/gemm_operators.py
b/benchmark/opperf/nd_operations/gemm_operators.py
index f102812..55b3435 100644
--- a/benchmark/opperf/nd_operations/gemm_operators.py
+++ b/benchmark/opperf/nd_operations/gemm_operators.py
@@ -35,8 +35,8 @@ TODO
"""
-def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
GEMM
+def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the GEMM
operators (dot, batch_dot, khatri_rao) in MXNet.
Parameters
@@ -47,6 +47,8 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nativ
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -57,43 +59,75 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nativ
Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
"""
- # Benchmark tests for dot operator
+ standard_inputs_dot = [{"lhs": (1024, 1024),
+ "rhs": (1024, 1024)},
+ {"lhs": (1000, 10),
+ "rhs": (1000, 10),
+ "transpose_b": True},
+ {"lhs": (1000, 1),
+ "rhs": (100, 1000),
+ "transpose_a": True,
+ "transpose_b": True}]
+ int64_tensor_inputs_dot = [{"lhs": (2**16, 2**16),
+ "rhs": (2**16, 2**16)},
+ {"lhs": (4, 2**30),
+ "rhs": (4, 2**30),
+ "transpose_b": True},
+ {"lhs": (2**28, 16),
+ "rhs": (16, 2**28),
+ "transpose_a": True,
+ "transpose_b": True}]
+ standard_inputs_batch_dot = [{"lhs": (32, 1024, 1024),
+ "rhs": (32, 1024, 1024)},
+ {"lhs": (32, 1000, 10),
+ "rhs": (32, 1000, 10),
+ "transpose_b": True},
+ {"lhs": (32, 1000, 1),
+ "rhs": (32, 100, 1000),
+ "transpose_a": True,
+ "transpose_b": True}]
+ int64_tensor_inputs_batch_dot = [{"lhs": (1, 2**16, 2**16),
+ "rhs": (1, 2**16, 2**16)},
+ {"lhs": (1, 4, 2**30),
+ "rhs": (1, 4, 2**30),
+ "transpose_b": True},
+ {"lhs": (1, 2**28, 16),
+ "rhs": (1, 16, 2**28),
+ "transpose_a": True,
+ "transpose_b": True}]
+ standard_inputs_khatri_rao = [{"args": [(32, 32), (32, 32)]},
+ {"args": [(64, 64), (64, 64)]}]
+ int64_tensor_inputs_khatri_rao = [{"args": [(2**32, 1), (2**32, 1)]}]
+
+ if int64_tensor == 'on':
+ inputs_dot = int64_tensor_inputs_dot
+ inputs_batch_dot = int64_tensor_inputs_batch_dot
+ inputs_khatri_rao = int64_tensor_inputs_khatri_rao
+ else:
+ inputs_dot = standard_inputs_dot
+ inputs_batch_dot = standard_inputs_batch_dot
+ inputs_khatri_rao = standard_inputs_khatri_rao
+
+ # Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
dtype=dtype, ctx=ctx,
- inputs=[{"lhs": (1024, 1024),
- "rhs": (1024, 1024)},
- {"lhs": (1000, 10),
- "rhs": (1000, 10),
- "transpose_b": True},
- {"lhs": (1000, 1),
- "rhs": (100, 1000),
- "transpose_a": True,
- "transpose_b": True}],
+ inputs=inputs_dot,
warmup=warmup, runs=runs, profiler=profiler)
- # Benchmark tests for batch_dot operator
+
batch_dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
dtype=dtype, ctx=ctx,
- inputs=[{"lhs": (32, 1024, 1024),
- "rhs": (32, 1024, 1024)},
- {"lhs": (32, 1000, 10),
- "rhs": (32, 1000, 10),
- "transpose_b": True},
- {"lhs": (32, 1000, 1),
- "rhs": (32, 100, 1000),
- "transpose_a": True,
- "transpose_b": True}],
+ inputs=inputs_batch_dot,
warmup=warmup, runs=runs, profiler=profiler)
- # Operator khatri_rao is not yet implemented for GPU
+ # Operator khatri_rao is not yet implemented for GPU
khatri_rao_benchmark_res = []
if ctx != mx.gpu():
# Benchmark tests for khatri_rao operator
khatri_rao_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False,
dtype=dtype, ctx=ctx,
- inputs=[{"args": [(32, 32), (32, 32)]},
- {"args": [(64, 64), (64, 64)]}],
+ inputs=inputs_khatri_rao,
warmup=warmup, runs=runs, profiler=profiler)
# Prepare combined results for GEMM operators
diff --git a/benchmark/opperf/nd_operations/indexing_routines.py
b/benchmark/opperf/nd_operations/indexing_routines.py
index a957785..ee99de2 100644
--- a/benchmark/opperf/nd_operations/indexing_routines.py
+++ b/benchmark/opperf/nd_operations/indexing_routines.py
@@ -35,8 +35,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_indexing_routines
"""
-def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the indexing routines
+def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and data
size (int64_tensor) for all the indexing routines
in MXNet.
Parameters
@@ -47,6 +47,8 @@ def run_indexing_routines_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -61,5 +63,5 @@ def run_indexing_routines_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
mx_indexing_ops = get_all_indexing_routines()
# Run benchmarks
- mx_indexing_op_results = run_op_benchmarks(mx_indexing_ops, dtype, ctx,
profiler, warmup, runs)
+ mx_indexing_op_results = run_op_benchmarks(mx_indexing_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
return mx_indexing_op_results
diff --git a/benchmark/opperf/nd_operations/linalg_operators.py
b/benchmark/opperf/nd_operations/linalg_operators.py
index d2c1cee..1d35ef1 100644
--- a/benchmark/opperf/nd_operations/linalg_operators.py
+++ b/benchmark/opperf/nd_operations/linalg_operators.py
@@ -34,8 +34,8 @@ from benchmark.opperf.utils.benchmark_utils import
run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE
-def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the linear algebra
+def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and data
size (int64_tensor) for all the linear algebra
operators in MXNet.
Parameters
@@ -46,6 +46,8 @@ def run_linalg_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nat
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -74,5 +76,5 @@ def run_linalg_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nat
# Fetch all Linear Algebra Operators
mx_linalg_ops = get_all_linalg_operators()
# Run benchmarks
- mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx,
profiler, warmup, runs)
+ mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
return merge_map_list(linalg_potrf_benchmark + [mx_linalg_op_results])
diff --git a/benchmark/opperf/nd_operations/misc_operators.py
b/benchmark/opperf/nd_operations/misc_operators.py
index 5a0efc5..fb8535a 100644
--- a/benchmark/opperf/nd_operations/misc_operators.py
+++ b/benchmark/opperf/nd_operations/misc_operators.py
@@ -37,7 +37,7 @@ from benchmark.opperf.rules.default_params import MX_OP_MODULE
from benchmark.opperf.custom_operations.custom_operations import
CustomAddOneProp
-def run_mx_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
+def run_mx_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all
the miscellaneous
operators in MXNet.
@@ -49,6 +49,8 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -59,6 +61,48 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
"""
+
+ standard_inputs_array_ops = [{"args": [(1024, 1024)],
+ "num_arrays": 1},
+ {"args": [(10000, 1)],
+ "num_arrays": 1},
+ {"args": [(10000, 10)],
+ "num_arrays": 1}]
+ int64_tensor_inputs_array_ops = [{"args": [(2**32, 1)],
+ "num_arrays":1}]
+ standard_inputs_add_n = [{"args": [(1024, 1024)]},
+ {"args": [(10000, 1)]},
+ {"args": [(10000, 10)]}]
+ int64_tensor_inputs_add_n = [{"args": [(2**16, 2**16)]}]
+ standard_inputs_upsampling = [{"args": (32, 3, 256, 256),
+ "scale": 2,
+ "sample_type": "nearest"},
+ {"args": (32, 3, 10000, 1),
+ "scale": 4,
+ "sample_type": "nearest"}]
+ int64_tensor_inputs_upsampling = [{"args": (2**32 + 1, 1, 1, 1),
+ "scale": 2,
+ "sample_type": "nearest"}]
+ standard_inputs_custom = [{"args": [(1024, 1024)],
+ "op_type": "CustomAddOne"},
+ {"args": [(10000, 1)],
+ "op_type": "CustomAddOne"},
+ {"args": [(10000, 10)],
+ "op_type": "CustomAddOne"}]
+ int64_tensor_inputs_custom = [{"args": [(2**32 + 1, 1)],
+ "op_type": "CustomAddOne"}]
+
+ if int64_tensor == 'on':
+ inputs_array_ops = int64_tensor_inputs_array_ops
+ inputs_add_n = int64_tensor_inputs_add_n
+ inputs_upsampling = int64_tensor_inputs_upsampling
+ inputs_custom = int64_tensor_inputs_custom
+ else:
+ inputs_array_ops = standard_inputs_array_ops
+ inputs_add_n = standard_inputs_add_n
+ inputs_upsampling = standard_inputs_upsampling
+ inputs_custom = standard_inputs_custom
+
# Individual tests for ops with positional args
array_ops_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"reset_arrays"),
getattr(MX_OP_MODULE,
"multi_all_finite"),
@@ -67,12 +111,7 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
dtype=dtype,
ctx=ctx,
profiler=profiler,
- inputs=[{"args": [(1024, 1024)],
- "num_arrays": 1},
- {"args": [(10000, 1)],
- "num_arrays": 1},
- {"args": [(10000, 10)],
- "num_arrays": 1}],
+ inputs=inputs_array_ops,
warmup=warmup,
runs=runs)
add_n_benchmark = run_performance_test([getattr(MX_OP_MODULE, "add_n")],
@@ -80,9 +119,7 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
dtype=dtype,
ctx=ctx,
profiler=profiler,
- inputs=[{"args": [(1024, 1024)]},
- {"args": [(10000, 1)]},
- {"args": [(10000, 10)]}],
+ inputs=inputs_add_n,
warmup=warmup,
runs=runs)
# There are currently issus with UpSampling with bilinear interpolation.
@@ -92,12 +129,7 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
dtype=dtype,
ctx=ctx,
profiler=profiler,
- inputs=[{"args": (32, 3, 256,
256),
- "scale": 2,
- "sample_type":
"nearest"},
- {"args": (32, 3,
10000, 1),
- "scale": 4,
- "sample_type":
"nearest"}],
+ inputs=inputs_upsampling,
warmup=warmup,
runs=runs)
# Create and register CustomAddOne operator for use in Custom op testing
@@ -108,17 +140,12 @@ def run_mx_misc_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
dtype=dtype,
ctx=ctx,
profiler=profiler,
- inputs=[{"args": [(1024, 1024)],
- "op_type":
"CustomAddOne"},
- {"args": [(10000, 1)],
- "op_type":
"CustomAddOne"},
- {"args": [(10000, 10)],
- "op_type":
"CustomAddOne"}],
+ inputs=inputs_custom,
warmup=warmup,
runs=runs)
# Fetch remaining Miscellaneous Operators
mx_misc_ops = get_remaining_miscellaneous_operators()
# Run benchmarks
- mx_misc_op_results = run_op_benchmarks(mx_misc_ops, dtype, ctx, profiler,
warmup, runs)
+ mx_misc_op_results = run_op_benchmarks(mx_misc_ops, dtype, ctx, profiler,
int64_tensor, warmup, runs)
return merge_map_list(array_ops_benchmark + add_n_benchmark +
upsampling_benchmark + custom_benchmark + [mx_misc_op_results])
diff --git a/benchmark/opperf/nd_operations/nn_activation_operators.py
b/benchmark/opperf/nd_operations/nn_activation_operators.py
index b77777c..161dfe7 100644
--- a/benchmark/opperf/nd_operations/nn_activation_operators.py
+++ b/benchmark/opperf/nd_operations/nn_activation_operators.py
@@ -43,9 +43,9 @@ from benchmark.opperf.utils.benchmark_utils import
run_op_benchmarks
"""
-def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
activation
- operators in MXNet.
+def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the activation
+ operators (relu, sigmoid, softmax) in MXNet.
Parameters
----------
@@ -55,6 +55,8 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler=
Precision to use for benchmarks
profiler: str, default 'native'
Module to use for tracking benchmark excecution time
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -70,6 +72,6 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler=
mx_activation_ops = get_all_nn_activation_operators()
# Run benchmarks
- mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype,
ctx, profiler, warmup, runs)
+ mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype,
ctx, profiler, int64_tensor, warmup, runs)
return mx_activation_op_results
\ No newline at end of file
diff --git a/benchmark/opperf/nd_operations/nn_basic_operators.py
b/benchmark/opperf/nd_operations/nn_basic_operators.py
index a8273d4..f3007ba 100644
--- a/benchmark/opperf/nd_operations/nn_basic_operators.py
+++ b/benchmark/opperf/nd_operations/nn_basic_operators.py
@@ -20,6 +20,10 @@ import mxnet as mx
from benchmark.opperf.utils.op_registry_utils import get_all_nn_basic_operators
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
+from benchmark.opperf.utils.benchmark_utils import run_performance_test
+from benchmark.opperf.utils.common_utils import merge_map_list
+from benchmark.opperf.rules.default_params import MX_OP_MODULE
+
"""Performance benchmark tests for MXNet NDArray basic NN Operators.
1. FullyConnected
@@ -45,8 +49,8 @@ from benchmark.opperf.utils.benchmark_utils import
run_op_benchmarks
"""
-def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
NN basic
+def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and data
size (int64_tensor) for all the basic neural network
operators in MXNet.
Parameters
@@ -56,7 +60,9 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
dtype: str, default 'float32'
Precision to use for benchmarks
profiler: str, default 'native'
- Module to use for tracking benchmark excecution time
+ Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -68,9 +74,71 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
"""
+ standard_data_list = [(1024, 4, 4)]
+ int64_tensor_data_list = [(2**28, 4, 4)]
+
+ if int64_tensor == 'on':
+ data_list = int64_tensor_data_list
+ else:
+ data_list = standard_data_list
+
+ for data in data_list:
+ rnn_relu_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"RNN")],
+ run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+ profiler=profiler,
+ inputs=[{"data": data,
+ "parameters": (7,),
+ "state": (1, 4, 1),
+ "mode": "rnn_relu",
+ "state_size": 1,
+ "num_layers": 1}],
+ warmup=warmup,
+ runs=runs)
+ rnn_tanh_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"RNN")],
+ run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+ profiler=profiler,
+ inputs=[{"data": data,
+ "parameters": (7,),
+ "state": (1, 4, 1),
+ "mode": "rnn_tanh",
+ "state_size": 1,
+ "num_layers": 1}],
+ warmup=warmup,
+ runs=runs)
+ rnn_lstm_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"RNN")],
+ run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+ profiler=profiler,
+ inputs=[{"data": data,
+ "parameters": (28,),
+ "state": (1, 4, 1),
+ "state_cell": (1,
4, 1),
+ "mode": "lstm",
+ "state_size": 1,
+ "num_layers": 1}],
+ warmup=warmup,
+ runs=runs)
+ rnn_gru_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"RNN")],
+ run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+ profiler=profiler,
+ inputs=[{"data": data,
+ "parameters": (21,),
+ "state": (1, 4, 1),
+ "mode": "gru",
+ "state_size": 1,
+ "num_layers": 1}],
+ warmup=warmup,
+ runs=runs)
# Fetch all NN Basic Operators
mx_nn_basic_ops = get_all_nn_basic_operators()
# Run benchmarks
- mx_nn_basic_op_results = run_op_benchmarks(mx_nn_basic_ops, dtype, ctx,
profiler, warmup, runs)
- return mx_nn_basic_op_results
+ mx_nn_basic_op_results = run_op_benchmarks(mx_nn_basic_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
+ return merge_map_list(rnn_relu_benchmark + rnn_tanh_benchmark +
rnn_lstm_benchmark + rnn_gru_benchmark + [mx_nn_basic_op_results])
diff --git a/benchmark/opperf/nd_operations/nn_conv_operators.py
b/benchmark/opperf/nd_operations/nn_conv_operators.py
index 9c80f00..d44b891 100644
--- a/benchmark/opperf/nd_operations/nn_conv_operators.py
+++ b/benchmark/opperf/nd_operations/nn_conv_operators.py
@@ -52,16 +52,55 @@ MXNet NDArray NN Convolution Operators
"""
-def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
+def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the pooling
+ operators in MXNet.
+
+ Parameters
+ ----------
+ ctx: mx.ctx
+ Context to run benchmarks
+ dtype: str, default 'float32'
+ Precision to use for benchmarks
+ profiler: str, default 'native'
+ Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
+ warmup: int, default 25
+ Number of times to run for warmup
+ runs: int, default 100
+ Number of runs to capture benchmark results
+
+ Returns
+ -------
+ Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
+
+ """
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]
+ standard_data_list_pool1d = [(32, 3, 256), (32, 3, 64)]
+ int64_tensor_data_list_pool1d = [(1, 1, 2**32)]
+ standard_data_list_pool2d = [(32, 3, 256, 256), (32, 3, 64, 64)]
+ int64_tensor_data_list_pool2d = [(2**28, 1, 4, 4)]
+ standard_data_list_roipool = [(32, 3, 256, 256), (32, 3, 64, 64)]
+ int64_tensor_data_list_roipool = [(32, 3, 2**13, 2**13)]
+
+ if int64_tensor == 'on':
+ data_list_pool1d = int64_tensor_data_list_pool1d
+ data_list_pool2d = int64_tensor_data_list_pool2d
+ data_list_roipool = int64_tensor_data_list_roipool
+ else:
+ data_list_pool1d = standard_data_list_pool1d
+ data_list_pool2d = standard_data_list_pool2d
+ data_list_roipool = standard_data_list_roipool
+
# Run 1D and 2D Pooling performance runs
pool1d_benchmark_res = []
pool2d_benchmark_res = []
for pool_type in pool_types:
for global_pool in global_pool_types:
- for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
+ for pool1d_data in data_list_pool1d:
pool1d_benchmark_res +=
run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
@@ -73,10 +112,10 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
"global_pool": global_pool,
"stride": 1,
"pad": 1}
- ],
+ ],
warmup=warmup,
runs=runs)
- for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
+ for pool2d_data in data_list_pool2d:
pool2d_benchmark_res +=
run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
@@ -88,68 +127,118 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='na
"global_pool": global_pool,
"stride": (1, 1),
"pad":
(0, 0)}
- ],
+ ],
warmup=warmup,
runs=runs)
- # Run ROI Pooling performance runs
- roipool_benchmark_res = []
- for roipool_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
- roipool_benchmark_res += run_performance_test([getattr(MX_OP_MODULE,
"ROIPooling")],
- run_backward=True,
- dtype=dtype,
- ctx=ctx,
- profiler=profiler,
- inputs=[{"data":
roipool_data,
- "rois": (32, 5),
- "pooled_size":
(2, 2),
-
"spatial_scale": .5}
- ],
- warmup=warmup,
- runs=runs)
+ # Run ROI Pooling performance runs
+ roipool_benchmark_res = []
+ for roipool_data in data_list_roipool:
+ roipool_benchmark_res +=
run_performance_test([getattr(MX_OP_MODULE, "ROIPooling")],
+
run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+
profiler=profiler,
+ inputs=[{"data":
roipool_data,
+ "rois":
(32, 5),
+
"pooled_size": (2, 2),
+
"spatial_scale": .5}
+ ],
+ warmup=warmup,
+ runs=runs)
# Prepare combined results
mx_pooling_op_results = merge_map_list(pool1d_benchmark_res +
pool2d_benchmark_res + roipool_benchmark_res)
return mx_pooling_op_results
-def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- # Conv1D Benchmarks
+def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the convolution
+ operators in MXNet.
+
+ Parameters
+ ----------
+ ctx: mx.ctx
+ Context to run benchmarks
+ dtype: str, default 'float32'
+ Precision to use for benchmarks
+ profiler: str, default 'native'
+ Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
+ warmup: int, default 25
+ Number of times to run for warmup
+ runs: int, default 100
+ Number of runs to capture benchmark results
+
+ Returns
+ -------
+ Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
+
+ """
+
+ standard_data_list_conv1d = [(32, 3, 256), (32, 3, 64)]
+ int64_tensor_data_list_conv1d = [(2**30, 1, 4)]
+ standard_weight_conv1d = (1, 3, 3)
+ int64_tensor_weight_conv1d = (1, 1, 1)
+ standard_kernel_conv1d = (3,)
+ int64_tensor_kernel_conv1d = (1,)
+ standard_data_list_conv2d = [(32, 3, 256, 256), (32, 3, 64, 64)]
+ int64_tensor_data_list_conv2d = [(2**28, 1, 4, 4)]
+ standard_weight_conv2d = (1, 3, 3, 3)
+ int64_tensor_weight_conv2d = (1, 1, 1, 1)
+ standard_kernel_conv2d = (3, 3)
+ int64_tensor_kernel_conv2d = (1, 1)
+
+ if int64_tensor == 'on':
+ data_list_conv1d = int64_tensor_data_list_conv1d
+ weight_conv1d = int64_tensor_weight_conv1d
+ kernel_conv1d = int64_tensor_kernel_conv1d
+ data_list_conv2d = int64_tensor_data_list_conv2d
+ weight_conv2d = int64_tensor_weight_conv2d
+ kernel_conv2d = int64_tensor_kernel_conv2d
+ else:
+ data_list_conv1d = standard_data_list_conv1d
+ weight_conv1d = standard_weight_conv1d
+ kernel_conv1d = standard_kernel_conv1d
+ data_list_conv2d = standard_data_list_conv2d
+ weight_conv2d = standard_weight_conv2d
+ kernel_conv2d = standard_kernel_conv2d
+
conv1d_benchmark_res = []
- for conv_data in [(32, 3, 256), (32, 3, 64)]:
+ conv2d_benchmark_res = []
+ # Conv1D Benchmarks
+ for conv_data in data_list_conv1d:
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE,
"Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data":
conv_data,
- "weight": (64,
3, 3),
- "bias": (64,),
- "kernel": (3,),
+ "weight":
weight_conv1d,
+ "bias": (1,),
+ "kernel":
kernel_conv1d,
"stride": (1,),
"dilate": (1,),
"pad": (0,),
- "num_filter": 64,
- "layout": 'NCW'}
- ],
+ "num_filter": 1,
+ "layout":
'NCW'}],
warmup=warmup,
runs=runs)
# Conv2D Benchmarks
- conv2d_benchmark_res = []
- for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
+ for conv_data in data_list_conv2d:
conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE,
"Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=[{"data":
conv_data,
- "weight": (64,
3, 3, 3),
- "bias": (64,),
- "kernel": (3, 3),
+ "weight":
weight_conv2d,
+ "bias": (1,),
+ "kernel":
kernel_conv2d,
"stride": (1, 1),
"dilate": (1, 1),
"pad": (0, 0),
- "num_filter": 64,
- "layout": 'NCHW'}
- ],
+ "num_filter": 1,
+ "layout":
'NCHW'}],
warmup=warmup,
runs=runs)
# Prepare combined results
@@ -157,50 +246,98 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler
return mx_conv_op_results
-def run_transpose_convolution_operators_benchmarks(ctx=mx.cpu(),
profiler='native', dtype='float32', warmup=10, runs=50):
+def run_transpose_convolution_operators_benchmarks(ctx=mx.cpu(),
profiler='native', int64_tensor='off', dtype='float32', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the transpose convolution
+ operators in MXNet.
+
+ Parameters
+ ----------
+ ctx: mx.ctx
+ Context to run benchmarks
+ dtype: str, default 'float32'
+ Precision to use for benchmarks
+ profiler: str, default 'native'
+ Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
+ warmup: int, default 25
+ Number of times to run for warmup
+ runs: int, default 100
+ Number of runs to capture benchmark results
+
+ Returns
+ -------
+ Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
+
+ """
+
+ standard_data_list_conv1d_transpose = [(32, 3, 256), (32, 3, 64)]
+ int64_tensor_data_list_conv1d_transpose = [(2**30, 1, 4)]
+ standard_weight_conv1d_transpose = (3, 1, 3)
+ int64_tensor_weight_conv1d_transpose = (1, 1, 1)
+ standard_kernel_conv1d_transpose = (3,)
+ int64_tensor_kernel_conv1d_transpose = (1,)
+ standard_data_list_conv2d_transpose = [(32, 3, 256, 256), (32, 3, 64, 64)]
+ int64_tensor_data_list_conv2d_transpose = [(2**28, 1, 4, 4)]
+ standard_weight_conv2d_transpose = (3, 1, 3, 3)
+ int64_tensor_weight_conv2d_transpose = (1, 1, 1, 1)
+ standard_kernel_conv2d_transpose = (3, 3)
+ int64_tensor_kernel_conv2d_transpose = (1, 1)
+
+ if int64_tensor == 'on':
+ data_list_conv1d_transpose = int64_tensor_data_list_conv1d_transpose
+ weight_conv1d_transpose = int64_tensor_weight_conv1d_transpose
+ kernel_conv1d_transpose = int64_tensor_kernel_conv1d_transpose
+ data_list_conv2d_transpose = int64_tensor_data_list_conv2d_transpose
+ weight_conv2d_transpose = int64_tensor_weight_conv2d_transpose
+ kernel_conv2d_transpose = int64_tensor_kernel_conv2d_transpose
+ else:
+ data_list_conv1d_transpose = standard_data_list_conv1d_transpose
+ weight_conv1d_transpose = standard_weight_conv1d_transpose
+ kernel_conv1d_transpose = standard_kernel_conv1d_transpose
+ data_list_conv2d_transpose = standard_data_list_conv2d_transpose
+ weight_conv2d_transpose = standard_weight_conv2d_transpose
+ kernel_conv2d_transpose = standard_kernel_conv2d_transpose
+
# Conv1DTranspose Benchmarks
conv1d_transpose_benchmark_res = []
- for conv_data in [(32, 3, 256), (32, 3, 64)]:
+ for conv_data in data_list_conv1d_transpose:
conv1d_transpose_benchmark_res +=
run_performance_test([getattr(MX_OP_MODULE, "Deconvolution")],
-
run_backward=True,
- dtype=dtype,
- ctx=ctx,
-
profiler=profiler,
-
inputs=[{"data": conv_data,
-
"weight": (3, 64, 3),
-
"bias": (64,),
-
"kernel": (3,),
-
"stride": (1,),
-
"dilate": (1,),
- "pad":
(0,),
- "adj":
(0,),
-
"num_filter": 64,
-
"no_bias": False,
-
"layout": 'NCW'}
- ],
- warmup=warmup,
- runs=runs)
+
run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+
profiler=profiler,
+
inputs=[{"data": conv_data,
+
"weight": weight_conv1d_transpose,
+
"bias": (1,),
+
"kernel": kernel_conv1d_transpose,
+
"stride": (1,),
+
"dilate": (1,),
+
"pad": (0,),
+
"num_filter": 1,
+
"no_bias": False,
+
"layout": 'NCW'}],
+
warmup=warmup,
+ runs=runs)
# Conv2DTranspose Benchmarks
conv2d_transpose_benchmark_res = []
- for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
+ for conv_data in data_list_conv2d_transpose:
conv2d_transpose_benchmark_res +=
run_performance_test([getattr(MX_OP_MODULE, "Deconvolution")],
-
run_backward=True,
- dtype=dtype,
- ctx=ctx,
-
profiler=profiler,
-
inputs=[{"data": conv_data,
-
"weight": (3, 64, 3, 3),
-
"bias": (64,),
-
"kernel": (3, 3),
-
"stride": (1, 1),
-
"dilate": (1, 1),
- "pad":
(0, 0),
-
"num_filter": 64,
-
"no_bias": False,
-
"layout": 'NCHW'}
- ],
- warmup=warmup,
- runs=runs)
+
run_backward=True,
+ dtype=dtype,
+ ctx=ctx,
+
profiler=profiler,
+
inputs=[{"data": conv_data,
+
"weight": weight_conv2d_transpose,
+
"bias": (1,),
+
"kernel": kernel_conv2d_transpose,
+
"stride": (1, 1),
+
"pad": (0, 0),
+
"num_filter": 1,
+
"no_bias": False,
+
"layout": 'NCHW'}],
+
warmup=warmup,
+ runs=runs)
# Prepare combined results
mx_transpose_conv_op_results =
merge_map_list(conv1d_transpose_benchmark_res + conv2d_transpose_benchmark_res)
return mx_transpose_conv_op_results
diff --git a/benchmark/opperf/nd_operations/nn_loss_operators.py
b/benchmark/opperf/nd_operations/nn_loss_operators.py
index 9d89408..dea19f1 100644
--- a/benchmark/opperf/nd_operations/nn_loss_operators.py
+++ b/benchmark/opperf/nd_operations/nn_loss_operators.py
@@ -28,8 +28,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_loss_operators
"""
-def run_loss_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all the
+def run_loss_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and data
size (int64_tensor) for all the
Neural Network loss operators in MXNet.
Parameters
@@ -40,6 +40,8 @@ def run_loss_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nativ
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -54,5 +56,5 @@ def run_loss_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='nativ
mx_loss_ops = get_all_loss_operators()
# Run benchmarks
- mx_loss_op_results = run_op_benchmarks(mx_loss_ops, dtype, ctx, profiler,
warmup, runs)
+ mx_loss_op_results = run_op_benchmarks(mx_loss_ops, dtype, ctx, profiler,
int64_tensor, warmup, runs)
return mx_loss_op_results
diff --git a/benchmark/opperf/nd_operations/nn_optimizer_operators.py
b/benchmark/opperf/nd_operations/nn_optimizer_operators.py
index ac38065..db18b30 100644
--- a/benchmark/opperf/nd_operations/nn_optimizer_operators.py
+++ b/benchmark/opperf/nd_operations/nn_optimizer_operators.py
@@ -54,8 +54,8 @@ from benchmark.opperf.rules.default_params import MX_OP_MODULE
"""
-def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype) for all
the neural network
+def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the neural network
optimizer update operators in MXNet.
Parameters
@@ -66,6 +66,8 @@ def run_optimizer_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -76,60 +78,68 @@ def run_optimizer_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='
Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
"""
+ standard_shape = (5, 5)
+ int64_tensor_shape = (2**16, 2**16)
+
+ if int64_tensor == 'on':
+ arg_shape = int64_tensor_shape
+ else:
+ arg_shape = standard_shape
+
# Run independent tests for ops that need specific input data
multi_mp_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE,
"multi_mp_sgd_mom_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
- "args3":
nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2,
- "out":
nd.random_normal(shape=(5,5))}],run_backward=False)
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape), "args2": nd.random_normal(shape=arg_shape),
+ "args3":
nd.random_normal(shape=arg_shape), "lrs": 0.1, "wds": 0.2,
+ "out":
nd.random_normal(shape=arg_shape)}],run_backward=False)
multi_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE,
"multi_sgd_mom_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)),
- "lrs": 0.1, "wds": 0.2, "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape),"args2": nd.random_normal(shape=arg_shape),
+ "lrs": 0.1, "wds": 0.2, "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
multi_sgd_res = run_performance_test([getattr(MX_OP_MODULE,
"multi_sgd_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2,
- "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape), "lrs": 0.1, "wds": 0.2,
+ "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
multi_mp_sgd_res = run_performance_test([getattr(MX_OP_MODULE,
"multi_mp_sgd_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)),
- "lrs": 0.1, "wds": 0.2, "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape),"args2": nd.random_normal(shape=arg_shape),
+ "lrs": 0.1, "wds": 0.2, "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
preloaded_multi_mp_sgd_res = run_performance_test(
[getattr(MX_OP_MODULE,
"preloaded_multi_mp_sgd_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape), "args2": nd.random_normal(shape=arg_shape),
"args3":
nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)),
- "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
preloaded_multi_sgd_mom_res = run_performance_test(
[getattr(MX_OP_MODULE,
"preloaded_multi_sgd_mom_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)),
- "args1":
nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape),
+ "args1":
nd.random_normal(shape=arg_shape), "args2": nd.random_normal(shape=arg_shape),
"args3":
nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)),
- "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
preloaded_multi_sgd_res = run_performance_test(
[getattr(MX_OP_MODULE,
"preloaded_multi_sgd_update")],
- inputs=[{"args0": nd.random_normal(shape=(5,5)),
"args1": nd.random_normal(shape=(5,5)),
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape), "args1": nd.random_normal(shape=arg_shape),
"args4": nd.random_normal(shape=(1)),
"args5": nd.random_normal(shape=(1)),
- "out": nd.random_normal(shape=(5,5))}],
run_backward=False)
+ "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
preloaded_multi_mp_sgd_mom_res = run_performance_test(
[getattr(MX_OP_MODULE,
"preloaded_multi_mp_sgd_mom_update")],
- inputs=[{"args0":
nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)),
- "args2":
nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(5,5)),
+ inputs=[{"args0":
nd.random_normal(shape=arg_shape), "args1": nd.random_normal(shape=arg_shape),
+ "args2":
nd.random_normal(shape=arg_shape), "args3": nd.random_normal(shape=arg_shape),
"args4":
nd.random_normal(shape=(1)), "args5": nd.random_normal(shape=(1)),
- "out":
nd.random_normal(shape=(5,5))}], run_backward=False)
+ "out":
nd.random_normal(shape=arg_shape)}], run_backward=False)
# Fetch remaining optimizer operators
mx_optimizer_ops = get_all_optimizer_operators()
# Run benchmarks
- mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx,
profiler, warmup, runs)
+ mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx,
profiler, int64_tensor, warmup, runs)
return merge_map_list(multi_sgd_mom_res + multi_sgd_mom_res +
multi_sgd_res + multi_mp_sgd_res + preloaded_multi_mp_sgd_res +\
preloaded_multi_sgd_mom_res +
preloaded_multi_mp_sgd_res + preloaded_multi_mp_sgd_mom_res +\
- [mx_optimizer_op_results])
+ multi_mp_sgd_mom_res + preloaded_multi_sgd_res +
[mx_optimizer_op_results])
diff --git a/benchmark/opperf/nd_operations/random_sampling_operators.py
b/benchmark/opperf/nd_operations/random_sampling_operators.py
index b6a1f44..777f26a 100644
--- a/benchmark/opperf/nd_operations/random_sampling_operators.py
+++ b/benchmark/opperf/nd_operations/random_sampling_operators.py
@@ -34,8 +34,8 @@ from benchmark.opperf.utils.benchmark_utils import
run_op_benchmarks
from benchmark.opperf.utils.op_registry_utils import
get_all_random_sampling_operators
-def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
random sampling
+def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the random sampling
operators in MXNet.
Parameters
@@ -46,6 +46,8 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', p
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -59,5 +61,5 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', p
# Fetch all Random Sampling Operators
mx_random_sample_ops = get_all_random_sampling_operators()
# Run benchmarks
- mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops,
dtype, ctx, profiler, warmup, runs)
+ mx_random_sample_op_results = run_op_benchmarks(mx_random_sample_ops,
dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_random_sample_op_results
diff --git a/benchmark/opperf/nd_operations/reduction_operators.py
b/benchmark/opperf/nd_operations/reduction_operators.py
index 6cc0d49..d6e4b6d 100644
--- a/benchmark/opperf/nd_operations/reduction_operators.py
+++ b/benchmark/opperf/nd_operations/reduction_operators.py
@@ -31,8 +31,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_reduction_operators
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
-def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
reduction
+def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the reduction
operators in MXNet.
Parameters
@@ -43,6 +43,8 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profile
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -56,5 +58,5 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profile
# Fetch all Reduction Operators
mx_reduction_broadcast_ops = get_all_reduction_operators()
# Run benchmarks
- mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops,
dtype, ctx, profiler, warmup, runs)
+ mx_reduction_op_results = run_op_benchmarks(mx_reduction_broadcast_ops,
dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_reduction_op_results
diff --git a/benchmark/opperf/nd_operations/sorting_searching_operators.py
b/benchmark/opperf/nd_operations/sorting_searching_operators.py
index 2d936cd..d0d9fc0 100644
--- a/benchmark/opperf/nd_operations/sorting_searching_operators.py
+++ b/benchmark/opperf/nd_operations/sorting_searching_operators.py
@@ -29,8 +29,8 @@ from benchmark.opperf.utils.op_registry_utils import
get_all_sorting_searching_o
"""
-def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
sorting and searching
+def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the sorting and searching
operators in MXNet.
Parameters
@@ -41,6 +41,8 @@ def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', pr
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -54,5 +56,5 @@ def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', pr
# Fetch all Random Sampling Operators
mx_sort_search_ops = get_all_sorting_searching_operators()
# Run benchmarks
- mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype,
ctx, profiler, warmup, runs)
+ mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype,
ctx, profiler, int64_tensor, warmup, runs)
return mx_sort_search_op_results
diff --git a/benchmark/opperf/nd_operations/unary_operators.py
b/benchmark/opperf/nd_operations/unary_operators.py
index 0807590..53cab57 100644
--- a/benchmark/opperf/nd_operations/unary_operators.py
+++ b/benchmark/opperf/nd_operations/unary_operators.py
@@ -38,8 +38,8 @@ from benchmark.opperf.utils.benchmark_utils import
run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE
-def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
- """Runs benchmarks with the given context and precision (dtype)for all the
unary
+def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
+ """Runs benchmarks with the given context, precision (dtype), and input
data size (int64_tensor) for all the unary
operators in MXNet.
Parameters
@@ -50,6 +50,8 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
+ int64_tensor: str, default 'off'
+ Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
@@ -60,16 +62,26 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
Dictionary of results. Key -> Name of the operator, Value -> Benchmark
results.
"""
+
+ standard_inputs = [{"args": [(1024, 1024)],
+ "num_outputs":1},
+ {"args": [(10000, 1)],
+ "num_outputs":1}]
+ int64_tensor_inputs = [{"args": [(2**32, 1)],
+ "num_outputs":1}]
+
+ if int64_tensor == 'on':
+ inputs = int64_tensor_inputs
+ else:
+ inputs = standard_inputs
+
# Run amp_multicast as it needs data as positional argument
amp_multicast_benchmark = run_performance_test([getattr(MX_OP_MODULE,
"amp_multicast")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
- inputs=[{"args": [(1024,
1024)],
- "num_outputs":1},
- {"args": [(10000,
1)],
- "num_outputs":1}],
+ inputs=inputs,
warmup=warmup,
runs=runs)
@@ -77,5 +89,5 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
mx_unary_broadcast_ops = get_all_unary_operators()
# Run benchmarks
- mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype,
ctx, profiler, warmup, runs)
+ mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype,
ctx, profiler, int64_tensor, warmup, runs)
return merge_map_list(amp_multicast_benchmark + [mx_unary_op_results])
diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py
index 5b8c43f..c0ac7b7 100755
--- a/benchmark/opperf/opperf.py
+++ b/benchmark/opperf/opperf.py
@@ -51,7 +51,7 @@ from benchmark.opperf.utils.op_registry_utils import
get_operators_with_no_bench
get_current_runtime_features
-def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', warmup=25, runs=100):
+def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32',
profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Run all the MXNet operators (NDArray) benchmarks.
Returns
@@ -63,64 +63,66 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(),
dtype='float32', profiler='n
# *************************MXNET TENSOR OPERATOR
BENCHMARKS*****************************
# Run all Unary operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_mx_unary_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Binary Broadcast, element_wise, and miscellaneous operations
benchmarks with default input values
mxnet_operator_benchmark_results.append(run_mx_binary_broadcast_operators_benchmarks(ctx=ctx,
-
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor,
warmup=warmup, runs=runs))
mxnet_operator_benchmark_results.append(run_mx_binary_element_wise_operators_benchmarks(ctx=ctx,
-
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor,
warmup=warmup, runs=runs))
mxnet_operator_benchmark_results.append(run_mx_binary_misc_operators_benchmarks(ctx=ctx,
-
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor,
warmup=warmup, runs=runs))
# Run all GEMM operations benchmarks with default input values
mxnet_operator_benchmark_results.append(run_gemm_operators_benchmarks(ctx=ctx,
-
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Random sampling operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_mx_random_sampling_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Reduction operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_mx_reduction_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Sorting and Searching operations benchmarks with default input
values
-
mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Array Rearrange operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Indexing routines benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# ************************ MXNET NN OPERATOR BENCHMARKS
****************************
# Run all basic NN operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_nn_basic_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Activation operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_activation_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Pooling operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_pooling_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Convolution operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Optimizer operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
-
+
mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
+
# Run all Transpose Convolution operations benchmarks with default input
values
-
mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all NN loss operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_loss_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# Run all Miscellaneous operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_mx_misc_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
mxnet_operator_benchmark_results.append(run_mx_misc_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
- # Run all Linear Algebra operations benchmarks with default input values
-
mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+ # Linear Algebra operators do not work with int64 tensor data. Issue
tracked here: https://github.com/apache/incubator-mxnet/issues/17716
+ if int64_tensor == 'off':
+ # Run all Linear Algebra operations benchmarks with default input
values
+
mxnet_operator_benchmark_results.append(run_linalg_operators_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs))
# ****************************** PREPARE FINAL RESULTS
********************************
final_benchmark_result_map =
merge_map_list(mxnet_operator_benchmark_results)
@@ -162,6 +164,11 @@ def main():
help='Use built-in CPP profiler (native) or Python'
'time module.'
'Valid Inputs - native, python')
+
+ parser.add_argument('--int64-tensor', type=str, default='off',
+ help='Run performance tests with large tensor input'
+ 'data (dimension >= 2**32) or standard input
data.'
+ 'Valid Inputs - on, off')
parser.add_argument('-w', '--warmup', type=int, default=25,
help='Number of times to run for warmup.'
@@ -169,7 +176,7 @@ def main():
parser.add_argument('-r', '--runs', type=int, default=100,
help='Number of runs to capture benchmark results.'
- 'Valid Inputs - positive integers')
+ 'Valid Inputs - positive integers')
args = parser.parse_args()
logging.info("Running MXNet operator benchmarks with the following
options: {args}".format(args=args))
@@ -180,9 +187,10 @@ def main():
ctx = _parse_mxnet_context(args.ctx)
dtype = args.dtype
profiler = args.profiler
+ int64_tensor = args.int64_tensor
warmup = args.warmup
runs = args.runs
- benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, warmup=warmup, runs=runs)
+ benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx,
dtype=dtype, profiler=profiler, int64_tensor=int64_tensor, warmup=warmup,
runs=runs)
# Sort benchmark results alphabetically by op name
final_benchmark_results = dict()
diff --git a/benchmark/opperf/rules/default_params.py
b/benchmark/opperf/rules/default_params.py
index 15bcd72..a4362fa 100644
--- a/benchmark/opperf/rules/default_params.py
+++ b/benchmark/opperf/rules/default_params.py
@@ -35,15 +35,22 @@ DEFAULT_DTYPE = ['float32', 'int32', 'float32'] # required
parameter for amp_ca
DEFAULT_DTYPE_INT = ['int32', 'int64', 'int32'] # randint works for int*
types only
DEFAULT_DTYPE_FLOAT = ['float16', 'float32', 'float64'] # random_exp works
for float* types only
+DEFAULT_DATA_LARGE_TENSOR = [(2**16, 2**16)]
+
# For Binary miscellaneous operators like choose_element0_index
# argument data must be indexed via an NDArray.
# NOTE: Data used is DEFAULT_DATA
DEFAULT_INDEX = [(1, 1024), (1, 1), (1, 100)]
+DEFAULT_INDEX_LARGE_TENSOR = [(1, 2**16)]
+
# For Binary broadcast operators like - broadcast_add/sub/mod/logical_and etc..
DEFAULT_LHS = [(1024, 1024), (10000, 10), (10000, 1)]
DEFAULT_RHS = [(1024, 1024), (10000, 10), (10000, 1)]
+DEFAULT_LHS_LARGE_TENSOR = [(2**16, 2**16), (2**28, 2**4), (2**32, 1)]
+DEFAULT_RHS_LARGE_TENSOR = [(2**16, 2**16), (2**28, 2**4), (2**32, 1)]
+
# For operators like - random_uniform, random_normal etc..
DEFAULT_SHAPE = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_SAMPLE = [(2,)]
@@ -52,6 +59,15 @@ DEFAULT_HIGH = [5]
DEFAULT_K = [1]
DEFAULT_P = [1]
+DEFAULT_SHAPE_LARGE_TENSOR = [(2**16, 2**16)]#, (2**32, 1), (2**25, 2**7)]
+DEFAULT_SAMPLE_LARGE_TENSOR = [(2**32,)]
+DEFAULT_DATA_RPD_LARGE_TENSOR = [(2**32 + 1, 5)]
+DEFAULT_ALPHA_RPD_LARGE_TENSOR = [(2**32,)]
+DEFAULT_SAMPLE_RPE_LARGE_TENSOR = [(1, 2**32)]
+DEFAULT_LAM_RPE_LARGE_TENSOR = [(1,)]
+DEFAULT_SAMPLE_RPG_LARGE_TENSOR = [(1, 2**32 + 1)]
+DEFAULT_ALPHA_RPG_LARGE_TENSOR = [(1,)]
+
# For operators like - sample_uniform, sample_normal etc..
# NOTE: There are many overlapping operators in random_* and sample_*,
# Ex: random_uniform, sample_uniform. Parameter names are same, but, for
@@ -73,6 +89,24 @@ DEFAULT_DATA_GRIDGEN = [(32, 2, 256, 256), (256, 6)]
DEFAULT_TARGET_SHAPE = [(256, 6)]
DEFAULT_DATA_SM = [(32, 32), (64, 64)]
+DEFAULT_LOW_ND_LARGE_TENSOR = [[0.0] * 2**16 + [2.5] * 2**16]
+DEFAULT_HIGH_ND_LARGE_TENSOR = [[1.0] * 2**16 + [3.7] * 2**16]
+DEFAULT_MU_ND_LARGE_TENSOR = [[2.0] * 2**16 + [2.5] * 2**16]
+DEFAULT_SIGMA_LARGE_TENSOR = [[1.0] * 2**16 + [3.7] * 2**16]
+DEFAULT_ALPHA_ND_LARGE_TENSOR = [[0.0] * 2**16 + [2.5] * 2**16]
+DEFAULT_BETA_ND_LARGE_TENSOR = [[1.0] * 2**16 + [0.7] * 2**16]
+DEFAULT_LAM_ND_LARGE_TENSOR = [[1.0] * 2**16 + [8.5] * 2**16]
+DEFAULT_K_ND_LARGE_TENSOR = [[20] * 2**16 + [49] * 2**16]
+DEFAULT_P_ND_LARGE_TENSOR = [[0.4] * 2**16 + [0.77] * 2**16]
+DEFAULT_DATA_BILINEAR_LARGE_TENSOR = [(2**32, 1, 1, 1)]
+DEFAULT_GRID_LARGE_TENSOR = [(2**32, 2, 1, 1)]
+DEFAULT_DATA_GRIDGEN_LARGE_TENSOR = [(2**31, 2, 1, 1), (1, 6)]
+DEFAULT_TARGET_SHAPE_LARGE_TENSOR = [(1, 6)]
+DEFAULT_DATA_SM_LARGE_TENSOR = [(2**32,)]
+DEFAULT_SHAPE_SE_LARGE_TENSOR = [(1,)]
+DEFAULT_LAM_SE_LARGE_TENSOR = [(2**32 + 1,)]
+DEFAULT_SHAPE_SU_LARGE_TENSOR = [(2**32,)]
+
# For reduction operators
# NOTE: Data used is DEFAULT_DATA
DEFAULT_AXIS_SHAPE = [(), 0, (0, 1)]
@@ -107,7 +141,6 @@ DEFAULT_ALPHA = [.001]
DEFAULT_NSIZE = [3]
DEFAULT_PARAMETERS = [(7,), (104,)]
DEFAULT_STATE = [(1, 4, 1), (2, 10000, 4)]
-DEFAULT_MODE = ["rnn_relu", "rnn_tanh"]
DEFAULT_STATE_SIZE = [1, 4]
DEFAULT_NUM_LAYERS = [1, 2]
DEFAULT_NUM_GROUPS = [1, 10]
@@ -119,6 +152,30 @@ DEFAULT_OUTPUT_SIZE = [(64, 16, 1), (32, 8, 1)]
DEFAULT_KERNEL = [(1, 1, 1), (1, 1, 1)]
DEFAULT_STRIDE = [(2, 2, 2), (1, 1, 1)]
+DEFAULT_DATA_NN_BASIC_LARGE_TENSOR = [(2**32 + 1, 1)]
+DEFAULT_NUM_HIDDEN_LARGE_TENSOR = [(1,)]
+DEFAULT_BIAS_LARGE_TENSOR = [(1,)]
+DEFAULT_FLATTEN_LARGE_TENSOR = [False]
+DEFAULT_GAMMA_LARGE_TENSOR = [(1,)]
+DEFAULT_BETA_LARGE_TENSOR = [(1,)]
+DEFAULT_MOVING_MEAN_LARGE_TENSOR = [(2**32 + 1,)]
+DEFAULT_MOVING_VAR_LARGE_TENSOR = [(2**32 + 1,)]
+DEFAULT_INPUT_DIM_LARGE_TENSOR = [2**32]
+DEFAULT_OUTPUT_DIM_LARGE_TENSOR = [1]
+DEFAULT_KERNEL_SIZE_LARGE_TENSOR = [1]
+DEFAULT_MAX_DISPLACEMENT_LARGE_TENSOR = [1]
+DEFAULT_STRIDE_1_LARGE_TENSOR = [1]
+DEFAULT_STRIDE_2_LARGE_TENSOR = [1]
+DEFAULT_DILATE_LARGE_TENSOR = [[]]
+DEFAULT_PAD_LARGE_TENSOR = [[]]
+DEFAULT_OUTPUT_SIZE_LARGE_TENSOR = [(2, 2, 1)]
+DEFAULT_KERNEL_LARGE_TENSOR = [(1, 1, 1)]
+DEFAULT_STRIDE_LARGE_TENSOR = [[]]
+DEFAULT_PARAMETERS_LARGE_TENSOR = [(7,)]
+DEFAULT_STATE_LARGE_TENSOR = [(1, 4, 1)]
+DEFAULT_STATE_SIZE_LARGE_TENSOR = [1]
+DEFAULT_NUM_LAYERS_LARGE_TENSOR = [1]
+
# BatchNorm
DEFAULT_AXIS_BN = [1]
@@ -132,41 +189,81 @@ DEFAULT_MODE_L2 = ['channel', 'instance', 'spatial']
# SVMOutput
DEFAULT_LABEL_SVM = [(32, 3, 256), (32, 3, 10000)]
+DEFAULT_DATA_SVM_LARGE_TENSOR = [(2**29, 2, 2, 2)]
+DEFAULT_LABEL_SVM_LARGE_TENSOR = [(2**29, 2, 2)]
+
# SoftmaxOutput
DEFAULT_LABEL_SM = [(32, 3, 256), (32, 3, 10000)]
+DEFAULT_DATA_SO_LARGE_TENSOR = [(2**29, 2, 2, 2)]
+DEFAULT_LABEL_SO_LARGE_TENSOR = [(2**29, 2, 2)]
+
# FullyConnected
DEFAULT_WEIGHT_FC = [(64, 3 * 256 * 256), (64, 10)]
+DEFAULT_DATA_FC_LARGE_TENSOR = [(2**32, 1)]
+DEFAULT_WEIGHT_FC_LARGE_TENSOR = [(1, 1)]
+DEFAULT_NUM_HIDDEN_FC_LARGE_TENSOR = [1]
+
# Embedding
DEFAULT_WEIGHT_EMBEDDING = [(3, 4), (16, 9)]
+DEFAULT_WEIGHT_EMBEDDING_LARGE_TENSOR = [(2**32, 1)]
+
# GroupNorm
DEFAULT_DATA_GN = [(32, 3, 256, 256), (32, 10, 10000, 10)]
DEFAULT_BETA_GAMMA_GN = [(1,), (10,)]
+DEFAULT_DATA_GN_LARGE_TENSOR = [(2**27, 4, 4, 2)]
+DEFAULT_BETA_GAMMA_GN_LARGE_TENSOR = [(1,)]
+
# Dropout
DEFAULT_DATA_DROPOUT = [(32, 3, 256, 256), (10000, 10)]
DEFAULT_MODE_DROPOUT = ["always"]
+DEFAULT_DATA_DROPOUT_LARGE_TENSOR = [(2**32 + 1,)]
+DEFAULT_P_DROPOUT_LARGE_TENSOR = [.5]
+DEFAULT_AXES_DROPOUT_LARGE_TENSOR = [[]]
+
# SpatialTransformer
DEFAULT_DATA_ST = [(32, 3, 256, 6), (256, 3, 10000, 6)]
DEFAULT_LOC_TAR_ST = [(32, 6), (256, 6)]
+DEFAULT_DATA_ST_LARGE_TENSOR = [(2, 2**29, 1, 6)]
+DEFAULT_LOC_TAR_ST_LARGE_TENSOR = [(2, 6)]
+
# im2col
DEFAULT_KERNEL_I2C = [(3,), (3, 3)]
DEFAULT_STRIDE_I2C = [(1,), (1, 1)]
+DEFAULT_DATA_I2C_LARGE_TENSOR = [(2**29, 2, 2, 6)]
+DEFAULT_KERNEL_I2C_LARGE_TENSOR = [(1,)]
+DEFAULT_STRIDE_I2C_LARGE_TENSOR = [[]]
+
# col2im
DEFAULT_DATA_C2I = [(32, 64, 256), (32, 64, 256)]
-# RNN
-DEFAULT_DATA_RNN = [(32, 4, 4), (512, 10000, 10)]
-DEFAULT_P_RNN = [.5]
+DEFAULT_DATA_C2I_LARGE_TENSOR = [(1, 2**30, 4)]
# LRN
DEFAULT_BETA_LRN = [.2]
+DEFAULT_DATA_LRN_LARGE_TENSOR = [(2**27, 4, 4, 2)]
+
+# Correlation
+DEFAULT_DATA1_LARGE_TENSOR = [(2**23, 8, 8, 8)]
+DEFAULT_DATA2_LARGE_TENSOR = [(2**23, 8, 8, 8)]
+
+# For regression operators
+DEFAULT_DATA_REG_LARGE_TENSOR = [(2**29, 2, 2, 2)]
+DEFAULT_LABEL_REG_LARGE_TENSOR = [(2**29, 2, 2, 2)]
+
+# For normalization operators
+DEFAULT_DATA_NORM_LARGE_TENSOR = [(2**29, 2, 2, 2)]
+DEFAULT_GAMMA_NORM_LARGE_TENSOR = [(2,)]
+DEFAULT_BETA_NORM_LARGE_TENSOR = [(2,)]
+DEFAULT_AXIS_LARGE_TENSOR = [-1]
+
# For optimizer operators
DEFAULT_WEIGHT = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_GRAD = [(1024, 1024), (10000, 1), (10000, 100)]
@@ -194,6 +291,20 @@ DEFAULT_CLIP_GRADIENT = [-1.0, 0.8]
DEFAULT_CLIP_WEIGHTS = [-1.0, 0.8]
DEFAULT_LAZY_UPDATE = [0, 1]
+DEFAULT_WEIGHT_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_GRAD_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_MOM_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_MEAN_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_VAR_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_N_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_D_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_V_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_Z_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_G_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+DEFAULT_R1_LARGE_TENSOR = [(1,)]
+DEFAULT_R2_LARGE_TENSOR = [(1,)]
+DEFAULT_DELTA_LARGE_TENSOR = [(2**16, 2**16), (2**32, 1), (2**25, 2**7)]
+
# For rearrange operators
# NOTE: Data needs to be a 4D tensor for operators like space_to_depth and
depth_to_space
# Hence below we append 4d to mark the difference.
@@ -201,6 +312,9 @@ DEFAULT_LAZY_UPDATE = [0, 1]
DEFAULT_DATA_4d = [(1, 4, 2, 4), (10, 25, 10, 100)]
DEFAULT_BLOCK_SIZE = [2, 5]
+DEFAULT_DATA_4d_LARGE_TENSOR = [(1, 4, 2, 2**29), (1,2**4,2**4,2**24)]
+DEFAULT_BLOCK_SIZE_LARGE_TENSOR = [2, 4]
+
# For miscellaneous operators
DEFAULT_DATA_SQUEEZE = [(1, 1024, 1024), (32, 1, 256, 256)]
DEFAULT_AXIS_SQUEEZE = [0, 1]
@@ -217,6 +331,15 @@ DEFAULT_LHS_FEI = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_MHS = [(1024,), (10000,), (10000,)]
DEFAULT_RHS_FEI = [(1024,), (10000,), (10000,)]
+DEFAULT_DATA_SQUEEZE_LARGE_TENSOR = [(2**32, 1)]
+DEFAULT_AXIS_SQUEEZE_LARGE_TENSOR = [1]
+DEFAULT_WSS_LARGE_TENSOR = [(2**32, 1)]
+DEFAULT_GSS_LARGE_TENSOR = [(2**32, 1)]
+DEFAULT_WDS_LARGE_TENSOR = [(2**32, 1)]
+DEFAULT_LHS_FEI_LARGE_TENSOR = [(2, 2**32 + 1)]
+DEFAULT_RHS_FEI_LARGE_TENSOR = [(2,)]
+DEFAULT_MHS_LARGE_TENSOR = [(2,)]
+
# For swapaxis operator
DEFAULT_DIM_1 = [0]
DEFAULT_DIM_2 = [1]
@@ -231,21 +354,33 @@ DEFAULT_X = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_Y = [(1024, 1024), (10000, 1), (10000, 100)]
DEFAULT_COND = [(1024,), (10000,), (10000,)]
DEFAULT_DEPTH = [0]
+
# For ravel_multi_index op, ndim(shape) = 2; hence data NDArray's first dim = 2
# First dimension of input of ravel operator should match shape parameter
dimension
# DEFAULT_SHAPE is reused for ravel_multi_index op
RAVEL_DATA = [(2, 1024)]
+RAVEL_DATA_LARGE_TENSOR = [(2, 2**32)]
+DEFAULT_X_LARGE_TENSOR = [(2**32, 1)]
+
# For loss operators
DEFAULT_DATA_3d = [(1024, 100, 100)]
DEFAULT_LABEL = [(100,100)]
DEFAULT_DATA_SMCE = [(1024, 1024)]
DEFAULT_LABEL_SMCE = [(1024,)]
+
+DEFAULT_LABEL_LARGE_TENSOR = [(1, 1)]
+DEFAULT_DATA_CTCLOSS = [(2**32, 1, 1)]
+DEFAULT_DATA_SMCE_LARGE_TENSOR = [(2**32 + 1, 1)]
+DEFAULT_LABEL_SMCE_LARGE_TENSOR = [(2**32 + 1,)]
+
# For NN operators
DEFAULT_ACT_TYPE_LR = ['leaky', 'elu', 'selu', 'gelu']
DEFAULT_ACT_TYPE_ACTIVATION = ['relu', 'sigmoid', 'softrelu', 'softsign',
'tanh']
DEFAULT_LABEL_SOFTMAX = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_LABEL_SOFTMAX_LARGE_TENSOR = [(2**32, 1)]
+
# For linalg operators
DEFAULT_A = [(1024, 1024)]
DEFAULT_B = [(1024, 1024)]
@@ -253,6 +388,11 @@ DEFAULT_C = [(1024, 1024)]
DEFAULT_A_MT = [(1024, 1035)]
DEFAULT_AXES = [[0, 1]]
+DEFAULT_A_LARGE_TENSOR = [(2**16, 2**16)]
+DEFAULT_B_LARGE_TENSOR = [(2**16, 2**16)]
+DEFAULT_C_LARGE_TENSOR = [(2**16, 2**16)]
+DEFAULT_A_MT_LARGE_TENSOR = [(2**32 + 1, 1)]
+
# Default Inputs. MXNet Op Param Name to Default Input mapping
DEFAULTS_INPUTS = {"data": DEFAULT_DATA,
"dtype": DEFAULT_DTYPE,
@@ -363,13 +503,10 @@ DEFAULTS_INPUTS = {"data": DEFAULT_DATA,
"output_size": DEFAULT_OUTPUT_SIZE,
"kernel_col2im": DEFAULT_KERNEL,
"stride_col2im": DEFAULT_STRIDE,
- "data_rnn": DEFAULT_DATA_RNN,
- "p_rnn": DEFAULT_P_RNN,
"parameters": DEFAULT_PARAMETERS,
"state": DEFAULT_STATE,
"state_size": DEFAULT_STATE_SIZE,
"num_layers": DEFAULT_NUM_LAYERS,
- "mode_rnn": DEFAULT_MODE,
"data_groupnorm": DEFAULT_DATA_GN,
"gamma_groupnorm": DEFAULT_BETA_GAMMA_GN,
"beta_groupnorm": DEFAULT_BETA_GAMMA_GN,
@@ -433,6 +570,222 @@ DEFAULTS_INPUTS = {"data": DEFAULT_DATA,
"data_layernorm": DEFAULT_DATA_NN_BASIC,
"axis_layernorm": DEFAULT_AXIS}
+# Default Inputs for Large Tensor. MXNet Op Param Name to Default Input mapping
+DEFAULTS_INPUTS_LARGE_TENSOR = {"data": DEFAULT_DATA_LARGE_TENSOR,
+ "dtype": DEFAULT_DTYPE,
+ "dtype_int": DEFAULT_DTYPE_INT,
+ "dtype_float": DEFAULT_DTYPE_FLOAT,
+ "sample": DEFAULT_SAMPLE_LARGE_TENSOR,
+ "lhs": DEFAULT_LHS_LARGE_TENSOR,
+ "rhs": DEFAULT_RHS_LARGE_TENSOR,
+ "shape": DEFAULT_SHAPE_LARGE_TENSOR,
+ "low": DEFAULT_LOW,
+ "high": DEFAULT_HIGH,
+ "low_nd": DEFAULT_LOW_ND_LARGE_TENSOR,
+ "high_nd": DEFAULT_HIGH_ND_LARGE_TENSOR,
+ "mu_nd": DEFAULT_MU_ND_LARGE_TENSOR,
+ "sigma": DEFAULT_SIGMA_LARGE_TENSOR,
+ "alpha_nd": DEFAULT_ALPHA_ND_LARGE_TENSOR,
+ "beta_nd": DEFAULT_BETA_ND_LARGE_TENSOR,
+ "lam_nd": DEFAULT_LAM_ND_LARGE_TENSOR,
+ "lam_random_pdf_exponential":
DEFAULT_LAM_RPE_LARGE_TENSOR,
+ "sample_random_pdf_exponential":
DEFAULT_SAMPLE_RPE_LARGE_TENSOR,
+ "k": DEFAULT_K,
+ "p": DEFAULT_P,
+ "k_nd": DEFAULT_K_ND_LARGE_TENSOR,
+ "p_nd": DEFAULT_P_ND_LARGE_TENSOR,
+ "axis_shape": DEFAULT_AXIS_SHAPE,
+ "axis": DEFAULT_AXIS,
+ "weight" : DEFAULT_WEIGHT_LARGE_TENSOR,
+ "weight32" : DEFAULT_WEIGHT_LARGE_TENSOR,
+ "grad" : DEFAULT_GRAD_LARGE_TENSOR,
+ "mean" : DEFAULT_MEAN_LARGE_TENSOR,
+ "var" : DEFAULT_VAR_LARGE_TENSOR,
+ "mom" : DEFAULT_MOM_LARGE_TENSOR,
+ "r1": DEFAULT_R1_LARGE_TENSOR,
+ "r2": DEFAULT_R2_LARGE_TENSOR,
+ "n" : DEFAULT_N_LARGE_TENSOR,
+ "d" : DEFAULT_D_LARGE_TENSOR,
+ "v" : DEFAULT_V_LARGE_TENSOR,
+ "z" : DEFAULT_Z_LARGE_TENSOR,
+ "g" : DEFAULT_G_LARGE_TENSOR,
+ "delta" : DEFAULT_DELTA_LARGE_TENSOR,
+ "lr" : DEFAULT_LR,
+ "lrs" : DEFAULT_LRS,
+ "wds" : DEFAULT_LRS,
+ "wd": DEFAULT_LR,
+ "gamma1" : DEFAULT_GAMMA_1,
+ "gamma2" : DEFAULT_GAMMA_2,
+ "epsilon" : DEFAULT_EPSILON,
+ "beta1" : DEFAULT_BETA_1,
+ "beta2" : DEFAULT_BETA_2,
+ "t" : DEFAULT_T,
+ "rescale_grad" : DEFAULT_RESCALE_GRAD,
+ "clip_grad" : DEFAULT_CLIP_GRADIENT,
+ "lazy_update" : DEFAULT_LAZY_UPDATE,
+ "data_4d": DEFAULT_DATA_4d_LARGE_TENSOR,
+ "dim1": DEFAULT_DIM_1,
+ "dim2": DEFAULT_DIM_2,
+ "block_size": DEFAULT_BLOCK_SIZE_LARGE_TENSOR,
+ "args": DEFAULT_ARGS,
+ "index": DEFAULT_INDEX_LARGE_TENSOR,
+ "data_smce": DEFAULT_DATA_SMCE_LARGE_TENSOR,
+ "label_smce": DEFAULT_LABEL_SMCE_LARGE_TENSOR,
+ "grid": DEFAULT_GRID_LARGE_TENSOR,
+ "data_bilinearsampler":
DEFAULT_DATA_BILINEAR_LARGE_TENSOR,
+ "transform_type": DEFAULT_TRANSFORM_TYPE,
+ "data_gridgenerator":
DEFAULT_DATA_GRIDGEN_LARGE_TENSOR,
+ "target_shape_gridgenerator":
DEFAULT_TARGET_SHAPE_LARGE_TENSOR,
+ "data_sample_multinomial":
DEFAULT_DATA_SM_LARGE_TENSOR,
+ "data_random_pdf_dirichlet":
DEFAULT_DATA_RPD_LARGE_TENSOR,
+ "alpha_random_pdf_dirichlet":
DEFAULT_ALPHA_RPD_LARGE_TENSOR,
+ "sample_random_pdf_gamma":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "alpha_random_pdf_gamma":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "beta_random_pdf_gamma":
DEFAULT_BETA_LARGE_TENSOR,
+
"sample_random_pdf_generalized_negative_binomial":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "mu_random_pdf_generalized_negative_binomial":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+
"alpha_random_pdf_generalized_negative_binomial":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "sample_random_pdf_negative_binomial":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "k_random_pdf_negative_binomial":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "p_random_pdf_negative_binomial":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "sample_random_pdf_normal":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "mu_random_pdf_normal":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "sigma_random_pdf_normal":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "sample_random_pdf_poisson":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "lam_random_pdf_poisson":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "sample_random_pdf_uniform":
DEFAULT_SAMPLE_RPG_LARGE_TENSOR,
+ "low_random_pdf_uniform":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "high_random_pdf_uniform":
DEFAULT_ALPHA_RPG_LARGE_TENSOR,
+ "shape_sample_exponential":
DEFAULT_SHAPE_SE_LARGE_TENSOR,
+ "lam_sample_exponential":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "mu_sample_normal":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "sigma_sample_normal":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "shape_sample_poisson":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "lam_sample_poisson":
DEFAULT_SHAPE_SE_LARGE_TENSOR,
+ "shape_sample_uniform":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "low_sample_uniform":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "high_sample_uniform":
DEFAULT_LAM_SE_LARGE_TENSOR,
+ "alpha_sample_gamma":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "beta_sample_gamma":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "mu_sample_generalized_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "shape_sample_generalized_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "alpha_sample_generalized_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "shape_sample_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "k_sample_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "p_sample_negative_binomial":
DEFAULT_SHAPE_SU_LARGE_TENSOR,
+ "A": DEFAULT_A_LARGE_TENSOR,
+ "B": DEFAULT_B_LARGE_TENSOR,
+ "C": DEFAULT_C_LARGE_TENSOR,
+ "A_linalg_maketrian":
DEFAULT_A_MT_LARGE_TENSOR,
+ "axes": DEFAULT_AXES,
+ "act_type_leakyrelu": DEFAULT_ACT_TYPE_LR,
+ "label_softmax":
DEFAULT_LABEL_SOFTMAX_LARGE_TENSOR,
+ "act_type_activation":
DEFAULT_ACT_TYPE_ACTIVATION,
+ "data_squeeze":
DEFAULT_DATA_SQUEEZE_LARGE_TENSOR,
+ "axis_squeeze":
DEFAULT_AXIS_SQUEEZE_LARGE_TENSOR,
+ "a_min": DEFAULT_A_MIN,
+ "a_max": DEFAULT_A_MAX,
+ "weights_sum_sq": DEFAULT_WSS_LARGE_TENSOR,
+ "grads_sum_sq": DEFAULT_GSS_LARGE_TENSOR,
+ "wds": DEFAULT_WDS_LARGE_TENSOR,
+ "eta": DEFAULT_ETA,
+ "eps": DEFAULT_EPSILON,
+ "stype": DEFAULT_STYPE,
+ "indices": DEFAULT_INDICES,
+ "begin": DEFAULT_BEGIN,
+ "end": DEFAULT_END,
+ "shape_like": DEFAULT_DATA_LARGE_TENSOR,
+ "depth": DEFAULT_DEPTH,
+ "condition": DEFAULT_X_LARGE_TENSOR,
+ "x": DEFAULT_X_LARGE_TENSOR,
+ "y": DEFAULT_X_LARGE_TENSOR,
+ "ravel_data": RAVEL_DATA_LARGE_TENSOR,
+ "a": DEFAULT_A_LARGE_TENSOR,
+ "lhs_fill_element_0index":
DEFAULT_LHS_FEI_LARGE_TENSOR,
+ "rhs_fill_element_0index":
DEFAULT_RHS_FEI_LARGE_TENSOR,
+ "mhs": DEFAULT_MHS_LARGE_TENSOR,
+ "lrs_multi_lars": DEFAULT_WSS_LARGE_TENSOR,
+ "data_softmax":
DEFAULT_LABEL_SOFTMAX_LARGE_TENSOR,
+ "data_spatialtransformer":
DEFAULT_DATA_ST_LARGE_TENSOR,
+ "loc_spatialtransformer":
DEFAULT_LOC_TAR_ST_LARGE_TENSOR,
+ "target_shape":
DEFAULT_LOC_TAR_ST_LARGE_TENSOR,
+ "transform_type_spatialtransformer":
DEFAULT_TRANSFORM,
+ "sampler_type": DEFAULT_SAMPLER,
+ "data_col2im": DEFAULT_DATA_C2I_LARGE_TENSOR,
+ "output_size":
DEFAULT_OUTPUT_SIZE_LARGE_TENSOR,
+ "kernel_col2im": DEFAULT_KERNEL_LARGE_TENSOR,
+ "stride_col2im": DEFAULT_STRIDE_LARGE_TENSOR,
+ "data_ctcloss": DEFAULT_DATA_CTCLOSS,
+ "label_ctcloss": DEFAULT_LABEL_LARGE_TENSOR,
+ "data_ctc_loss": DEFAULT_DATA_CTCLOSS,
+ "label_ctc_loss": DEFAULT_LABEL_LARGE_TENSOR,
+ "parameters": DEFAULT_PARAMETERS_LARGE_TENSOR,
+ "state": DEFAULT_STATE_LARGE_TENSOR,
+ "state_size": DEFAULT_STATE_SIZE_LARGE_TENSOR,
+ "num_layers": DEFAULT_NUM_LAYERS_LARGE_TENSOR,
+ "data_groupnorm": DEFAULT_DATA_GN_LARGE_TENSOR,
+ "gamma_groupnorm":
DEFAULT_BETA_GAMMA_GN_LARGE_TENSOR,
+ "beta_groupnorm":
DEFAULT_BETA_GAMMA_GN_LARGE_TENSOR,
+ "eps": DEFAULT_EPSILON,
+ "data_dropout":
DEFAULT_DATA_DROPOUT_LARGE_TENSOR,
+ "mode_dropout": DEFAULT_MODE_DROPOUT,
+ "p_dropout": DEFAULT_P_DROPOUT_LARGE_TENSOR,
+ "axes_dropout":
DEFAULT_AXES_DROPOUT_LARGE_TENSOR,
+ "data_nn_basic":
DEFAULT_DATA_NN_BASIC_LARGE_TENSOR,
+ "num_hidden": DEFAULT_NUM_HIDDEN_LARGE_TENSOR,
+ "data_fullyconnected":
DEFAULT_DATA_FC_LARGE_TENSOR,
+ "weight_fullyconnected":
DEFAULT_WEIGHT_FC_LARGE_TENSOR,
+ "num_hidden_fullyconnected":
DEFAULT_NUM_HIDDEN_FC_LARGE_TENSOR,
+ "weight_embedding":
DEFAULT_WEIGHT_EMBEDDING_LARGE_TENSOR,
+ "bias": DEFAULT_BIAS_LARGE_TENSOR,
+ "flatten": DEFAULT_FLATTEN_LARGE_TENSOR,
+ "data_batchnorm":
DEFAULT_DATA_NN_BASIC_LARGE_TENSOR,
+ "gamma_batchnorm": DEFAULT_GAMMA_LARGE_TENSOR,
+ "beta_batchnorm": DEFAULT_BETA_LARGE_TENSOR,
+ "moving_mean_batchnorm":
DEFAULT_MOVING_MEAN_LARGE_TENSOR,
+ "moving_var_batchnorm":
DEFAULT_MOVING_VAR_LARGE_TENSOR,
+ "axis_batchnorm": DEFAULT_AXIS_BN,
+ "data_softmaxoutput":
DEFAULT_DATA_SO_LARGE_TENSOR,
+ "label_softmaxoutput":
DEFAULT_LABEL_SO_LARGE_TENSOR,
+ "data_maeregressionoutput":
DEFAULT_DATA_REG_LARGE_TENSOR,
+ "label_maeregressionoutput":
DEFAULT_LABEL_REG_LARGE_TENSOR,
+ "data_logisticregressionoutput":
DEFAULT_DATA_REG_LARGE_TENSOR,
+ "label_logisticregressionoutput":
DEFAULT_LABEL_REG_LARGE_TENSOR,
+ "data_linearregressionoutput":
DEFAULT_DATA_REG_LARGE_TENSOR,
+ "label_linearregressionoutput":
DEFAULT_LABEL_REG_LARGE_TENSOR,
+ "data_svmoutput":
DEFAULT_DATA_SVM_LARGE_TENSOR,
+ "label_svmoutput":
DEFAULT_LABEL_SVM_LARGE_TENSOR,
+ "grad_scale": DEFAULT_GRAD_SCALE,
+ "normalization": DEFAULT_NORMALIZATION,
+ "margin": DEFAULT_MARGIN,
+ "regularization_coefficient":
DEFAULT_REG_COEFF,
+ "data_l2normalization":
DEFAULT_DATA_NORM_LARGE_TENSOR,
+ "mode_l2normalization": DEFAULT_MODE_L2,
+ "gamma_layernorm":
DEFAULT_GAMMA_NORM_LARGE_TENSOR,
+ "beta_layernorm":
DEFAULT_BETA_NORM_LARGE_TENSOR,
+ "data_instancenorm":
DEFAULT_DATA_NORM_LARGE_TENSOR,
+ "gamma_instancenorm":
DEFAULT_GAMMA_NORM_LARGE_TENSOR,
+ "beta_instancenorm":
DEFAULT_GAMMA_NORM_LARGE_TENSOR,
+ "input_dim": DEFAULT_INPUT_DIM_LARGE_TENSOR,
+ "output_dim": DEFAULT_OUTPUT_DIM_LARGE_TENSOR,
+ "sparse_grad": DEFAULT_SPARSE_GRAD,
+ "data1": DEFAULT_DATA1_LARGE_TENSOR,
+ "data2": DEFAULT_DATA2_LARGE_TENSOR,
+ "kernel_size":
DEFAULT_KERNEL_SIZE_LARGE_TENSOR,
+ "max_displacement":
DEFAULT_MAX_DISPLACEMENT_LARGE_TENSOR,
+ "stride1": DEFAULT_STRIDE_1_LARGE_TENSOR,
+ "stride2": DEFAULT_STRIDE_2_LARGE_TENSOR,
+ "data_im2col": DEFAULT_DATA_I2C_LARGE_TENSOR,
+ "kernel_im2col":
DEFAULT_KERNEL_I2C_LARGE_TENSOR,
+ "stride_im2col":
DEFAULT_STRIDE_I2C_LARGE_TENSOR,
+ "dilate_im2col": DEFAULT_DILATE_LARGE_TENSOR,
+ "pad_im2col": DEFAULT_PAD_LARGE_TENSOR,
+ "data_lrn": DEFAULT_DATA_LRN_LARGE_TENSOR,
+ "alpha_lrn": DEFAULT_ALPHA,
+ "beta_lrn": DEFAULT_BETA_LRN,
+ "nsize": DEFAULT_NSIZE,
+ "data_layernorm":
DEFAULT_DATA_NORM_LARGE_TENSOR,
+ "axis_layernorm": DEFAULT_AXIS_LARGE_TENSOR}
# These are names of MXNet operator parameters that is of type NDArray.
# We maintain this list to automatically recognize these parameters are to be
@@ -446,4 +799,6 @@ PARAMS_OF_TYPE_NDARRAY = ["lhs", "rhs", "data", "base",
"exp", "sample",
"v", "z", "g", "delta", "args", "indices",
"shape_like", "y",
"x", "condition", "a", "index", "raveL_data",
"label", "grid",
"A", "B", "C", "r1", "r2", "rois", "lrs", "wds",
"weights_sum_sq",
- "grads_sum_sq", "mhs", "data1", "data2", "loc",
"parameters", "state"]
+ "grads_sum_sq", "mhs", "data1", "data2", "loc",
"parameters", "state",
+ "state_cell"]
+
diff --git a/benchmark/opperf/utils/benchmark_utils.py
b/benchmark/opperf/utils/benchmark_utils.py
index f6cdfe0..f2cce0a 100644
--- a/benchmark/opperf/utils/benchmark_utils.py
+++ b/benchmark/opperf/utils/benchmark_utils.py
@@ -181,7 +181,7 @@ def run_performance_test(ops, inputs, run_backward=True,
return op_benchmark_result
-def run_op_benchmarks(ops, dtype, ctx, profiler, warmup, runs):
+def run_op_benchmarks(ops, dtype, ctx, profiler, int64_tensor, warmup, runs):
# Running SoftmaxOutput backwards on GPU results in errors
# track issue here: https://github.com/apache/incubator-mxnet/issues/880
gpu_backwards_disabled_ops = ['SoftmaxOutput']
@@ -195,7 +195,7 @@ def run_op_benchmarks(ops, dtype, ctx, profiler, warmup,
runs):
for op, op_params in ops.items():
if ctx == mx.cpu() or op not in gpu_disabled_ops:
# Prepare inputs for the operator
- inputs = prepare_op_inputs(op, op_params)
+ inputs = prepare_op_inputs(op, op_params, int64_tensor)
# setting backward false for ops with known issue
if (ctx == mx.gpu() and op in gpu_backwards_disabled_ops) or op in
no_backward:
diff --git a/benchmark/opperf/utils/op_registry_utils.py
b/benchmark/opperf/utils/op_registry_utils.py
index 99678b8..b27b8e4 100644
--- a/benchmark/opperf/utils/op_registry_utils.py
+++ b/benchmark/opperf/utils/op_registry_utils.py
@@ -20,7 +20,7 @@ from operator import itemgetter
from mxnet import runtime
import mxnet as mx
-from benchmark.opperf.rules.default_params import DEFAULTS_INPUTS, MX_OP_MODULE
+from benchmark.opperf.rules.default_params import DEFAULTS_INPUTS,
DEFAULTS_INPUTS_LARGE_TENSOR, MX_OP_MODULE
def _select_ops(operator_names, filters=("_contrib", "_"),
merge_op_forward_backward=True):
@@ -109,7 +109,7 @@ def prepare_op_inputs(arg_params, arg_values):
return inputs
-def prepare_op_inputs(op, arg_params):
+def prepare_op_inputs(op, arg_params, int64_tensor):
inputs = []
# 4d tensor is needed only by following two ops
@@ -120,14 +120,27 @@ def prepare_op_inputs(op, arg_params):
# For ops with args that need to change shape/value for different ops
custom_data = {'Activation', 'LeakyReLU', 'Softmax', 'BilinearSampler',
'GridGenerator', 'sample_multinomial', 'linalg_maketrian',
- 'SpatialTransformer', 'col2im', 'RNN', 'GroupNorm',
'Dropout', 'FullyConnected',
+ 'SpatialTransformer', 'col2im', 'GroupNorm', 'Dropout',
'FullyConnected',
'SoftmaxOutput', 'LinearRegressionOutput', 'BatchNorm',
'LogisticRegressionOutput',
'MAERegressionOutput', 'SVMOutput', 'L2Normalization',
'LayerNorm', 'InstanceNorm',
'Embedding', 'Correlation', 'im2col', 'LRN', 'squeeze',
'fill_element_0index'}
+ custom_data_int64 = {'random_pdf_dirichlet', 'random_pdf_exponential',
'random_pdf_gamma',
+ 'random_pdf_generalized_negative_binomial',
'random_pdf_negative_binomial',
+ 'random_pdf_normal', 'random_pdf_poisson',
'random_pdf_uniform', 'sample_exponential',
+ 'sample_normal', 'sample_poisson', 'sample_uniform',
'sample_gamma',
+ 'sample_generalized_negative_binomial',
'sample_negative_binomial', 'CTCLoss',
+ 'ctc_loss', 'multi_lars'}
+
int_only = {'random_randint'}
float_only = {'log_softmax', 'softmax', 'softmin'}
+ if int64_tensor == 'on':
+ default_inputs = DEFAULTS_INPUTS_LARGE_TENSOR
+ custom_data |= custom_data_int64
+ else:
+ default_inputs = DEFAULTS_INPUTS
+
# Prepare op to default input mapping
arg_values = {}
for arg_name, arg_type in zip(arg_params["params"]["arg_names"],
@@ -137,29 +150,29 @@ def prepare_op_inputs(op, arg_params):
# same for randint (which is the only op that takes only int as input)
# rest all operators take int as well as float
if op in int_only and arg_name == "dtype":
- arg_values[arg_name] = DEFAULTS_INPUTS["dtype_int"]
+ arg_values[arg_name] = default_inputs["dtype_int"]
elif (op.startswith(('random','sample')) or op in float_only) and
arg_name == "dtype":
- arg_values[arg_name] = DEFAULTS_INPUTS["dtype_float"]
+ arg_values[arg_name] = default_inputs["dtype_float"]
elif "NDArray" in arg_type and op == "ravel_multi_index":
- arg_values[arg_name] = DEFAULTS_INPUTS["ravel_data"]
- elif op in custom_data and arg_name + "_" + op.lower() in
DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_" + op.lower()]
- elif "NDArray" in arg_type and arg_name + "_nd" in DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_nd"]
- elif "NDArray" in arg_type and op in ops_4d and arg_name + "_4d" in
DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_4d"]
- elif "NDArray" in arg_type and op in ops_3d and arg_name + "_3d" in
DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_3d"]
+ arg_values[arg_name] = default_inputs["ravel_data"]
+ elif op in custom_data and arg_name + "_" + op.lower() in
default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name + "_" + op.lower()]
+ elif "NDArray" in arg_type and arg_name + "_nd" in default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name + "_nd"]
+ elif "NDArray" in arg_type and op in ops_4d and arg_name + "_4d" in
default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name + "_4d"]
+ elif "NDArray" in arg_type and op in ops_3d and arg_name + "_3d" in
default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name + "_3d"]
elif "NDArray" in arg_type and op == 'softmax_cross_entropy':
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_smce"]
- elif arg_name in DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name]
- elif "float" in arg_type and arg_name + "_float" in DEFAULTS_INPUTS:
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_float"]
- elif "Shape" in arg_type and arg_name + "_shape" in DEFAULTS_INPUTS:
+ arg_values[arg_name] = default_inputs[arg_name + "_smce"]
+ elif arg_name in default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name]
+ elif "float" in arg_type and arg_name + "_float" in default_inputs:
+ arg_values[arg_name] = default_inputs[arg_name + "_float"]
+ elif "Shape" in arg_type and arg_name + "_shape" in default_inputs:
# This is for cases where in some ops 'axis' is Int in some ops a
shape tuple.
# Ex: axis in sum is shape, axis in sort is int.
- arg_values[arg_name] = DEFAULTS_INPUTS[arg_name + "_shape"]
+ arg_values[arg_name] = default_inputs[arg_name + "_shape"]
# Number of different inputs we want to use to test
# the operator
@@ -340,7 +353,7 @@ def get_all_nn_basic_operators():
nn_basic_ops = ['FullyConnected', 'Dropout', 'BatchNorm', 'SoftmaxOutput',
'LinearRegressionOutput',
'LogisticRegressionOutput', 'MAERegressionOutput',
'SVMOutput', 'L2Normalization',
'LayerNorm', 'InstanceNorm', 'Embedding', 'Correlation',
'SpatialTransformer', 'im2col',
- 'col2im', 'GroupNorm', 'RNN', 'LRN']
+ 'col2im', 'GroupNorm', 'LRN']
# Get all mxnet operators
mx_operators = _get_all_mxnet_operators()