This is an automated email from the ASF dual-hosted git repository.
csullivan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 2ae20882d3 [hexagon][testing] add TIRScript elemwise-add (#11490)
2ae20882d3 is described below
commit 2ae20882d3e34cc6e5acef992c23c17a585c25aa
Author: Christian Convey <[email protected]>
AuthorDate: Fri Jun 3 11:58:30 2022 -0400
[hexagon][testing] add TIRScript elemwise-add (#11490)
Replace TE-based elementwise-add benchmark with
a TVMScript-based one.
Update Hexagon target architecture from v68 to v69.
As a result, the benchmark now requires a version of
Hexagon SDK newer than 4.4.0.1. Version 4.5.0.3 is
known to work.
---
.../contrib/test_hexagon/benchmark_elemwise_add.py | 434 +++++++++++++++++++++
.../contrib/test_hexagon/benchmark_hexagon.py | 245 ------------
.../python/contrib/test_hexagon/benchmark_util.py | 34 ++
3 files changed, 468 insertions(+), 245 deletions(-)
diff --git a/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py
b/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py
new file mode 100644
index 0000000000..70266d7939
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import os.path
+import sys
+import pytest
+import numpy as np
+import logging
+import tempfile
+
+import tvm.testing
+import tvm.script
+from tvm.script import tir as T
+from tvm import te
+from tvm.contrib.hexagon.build import HexagonLauncherRPC
+from . import benchmark_util
+
+# This is a fixed detail of the v68 architecture.
+HVX_VECTOR_BYTES = 128
+
+_HEXAGON_TARGET = tvm.target.hexagon("v69", link_params=True)
+
+_SUPER_TARGET = tvm.target.Target(_HEXAGON_TARGET, host=_HEXAGON_TARGET)
+
+# NOTE on server ports:
+# These tests use different port numbers for the RPC server (7070 + ...).
+# The reason is that an RPC session cannot be gracefully closed without
+# triggering TIME_WAIT state on the server socket. This prevents another
+# server to bind to the same port until the wait time elapses.
+
+_BT = benchmark_util.BenchmarksTable()
+
+_CSV_COLUMN_ORDER = [
+ # Identifies which TE-compute / TIRScript is used as the basis for the
+ # benchmarked primfunc. Only needs to be meaningful to humans.
+ "basic_kernel",
+ # The tensors' element type
+ "dtype",
+ # When applicable, indicates the particular variation of schedules
+ # apply by the Python code. Decoding this may require looking at this
+ # script's source code.
+ "sched_type",
+ # The memory location of the tensors used during the execution of
+ # the primfunc. We currently assume just one location.
+ # This will likely need to be generalized as we add more sophisticated
+ # primfuncs.
+ "mem_scope",
+ # For primfuncs that treat tensor buffers as collections of 1D vectors,
+ # this is the number of vectors in each tensor.
+ # This will likely need to be generalized as we add more sophisticated
+ # primfuncs.
+ "num_vectors_per_tensor",
+ # Reserved columns defined by the BenchmarksTable class.
+ "row_status",
+ "timings_min_usecs",
+ "timings_max_usecs",
+ "timings_median_usecs",
+ "timings_mean_usecs",
+ "timings_stddev_usecs",
+ # For benchmarks that produce files on the host file system, this indicates
+ # their location. Useful for post-mortem investigation of benchmark
results.
+ "host_files_dir_path",
+ # Miscellaneous comments about the benchmark.
+ "comments",
+]
+
+_HOST_OUTPUT_DIR = tempfile.mkdtemp()
+
+_PRIMFUNC_NAME = "elemwise_add"
+
+print("-" * 80)
+print("OUTPUT DIRECTORY: {}".format(_HOST_OUTPUT_DIR))
+print("-" * 80)
+print()
+
+
+class UnsupportedException(Exception):
+ """
+ Indicates that the specified benchmarking configuration is known to
+ currently be unsupported. The Exception message may provide more detail.
+ """
+
+
+class NumericalAccuracyException(Exception):
+ """
+ Indicates that the benchmarking configuration appeared to run successfully,
+ but the output data didn't have the expected accuracy.
+ """
+
+
+from typing import Tuple
+
+
+def _get_irmod_elemwise_add(
+ _PRIMFUNC_NAME: str, shape: list, dtype: str, mem_scope: str
+) -> tvm.ir.module.IRModule:
+ """
+ Return an IRModule containing a single primfunc, expressed as NS-TIR.
+
+ The primfunc implements elementwise-add. Its signature is (A,B,C), where
+ A and B are the input tensors, and C is the output tensor.
+ All three tensors have the specfied shape, dtype, and mem_scope.
+
+ If the specified primfunc is known to be unsupported, raise an
UnsupportedExcetion.
+ """
+ assert len(shape) == 2
+
+ # TVMScript can reference simple Python variables, but it doesn't
+ # curently support more complex Python expressions...
+ (
+ dim0_size,
+ dim1_size,
+ ) = shape
+ dtype_str = str(dtype)
+
+ if mem_scope == "global.vtcm":
+ raise UnsupportedException("This benchmark kernel does not yet support
VTCM buffers.")
+
+ # This check is currently elided by the one above, but it should
become relevant as soon
+ # as we add VTCM support to this kernel generator.
+ #
+ # Also: The VTCM budget is a very rough estimate, based only on
experience.
+ # Assuming that it's even reasonable to use a hard-coded estimate AT
ALL, this number
+ # may need tweaking.
+ estimated_vtcm_budget_bytes = HVX_VECTOR_BYTES * 1024
+
+ dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
+ assert dtype_bits % 8 == 0
+ dtype_bytes = dtype_bits // 8
+
+ num_vtcm_tensors = 3
+ estimated_vtcm_needed_bytes = shape[0] * shape[1] * dtype_bytes *
num_vtcm_tensors
+
+ if estimated_vtcm_needed_bytes > estimated_vtcm_budget_bytes:
+ raise UnsupportedException("Expect to exceed VTCM budget.")
+
+ @tvm.script.ir_module
+ class BenchmarkModule:
+ @T.prim_func
+ def main(a: T.handle, b: T.handle, c: T.handle):
+ # We exchange data between function by handles, which are similar
to pointer.
+ T.func_attr({"global_symbol": "main", "tir.noalias": True})
+
+ A = T.match_buffer(a, shape, dtype=dtype)
+ B = T.match_buffer(b, shape, dtype=dtype)
+ C = T.match_buffer(c, shape, dtype=dtype)
+
+ for i in range(dim0_size):
+ for j in range(dim1_size):
+ C[i, j] = A[i, j] + B[i, j]
+
+ return BenchmarkModule
+
+
+def _benchmark_hexagon_elementwise_add_kernel(
+ hexagon_launcher: HexagonLauncherRPC, shape: list, dtype: str, mem_scope:
str
+):
+ """
+ Generate and benchmark a single elementwise-add kernel for Hexagon.
+
+ Produce these outputs:
+ - Printed status updates / results to stdout and/or stderr.
+
+ - Create a new subdirectory under _HOST_OUTPUT_DIR, and populate it with
+ various logs and intermediate files.
+
+ - Add to _BT a row describing this benchmark run.
+ """
+ # Represent the benchmark details in a form required by the benchmark table
+ # and for other logging...
+ keys_dict = {
+ "basic_kernel": "ewise-add",
+ "dtype": dtype,
+ "shape": shape,
+ "mem_scope": mem_scope,
+ }
+
+ desc = benchmark_util.get_benchmark_decription(keys_dict)
+
+ # Create the host-side directory for this benchmark run's files / logs...
+ host_files_dir_name = benchmark_util.get_benchmark_id(keys_dict)
+ host_files_dir_path = os.path.join(_HOST_OUTPUT_DIR, host_files_dir_name)
+ os.mkdir(host_files_dir_path)
+
+ keys_dict["host_files_dir_path"] = host_files_dir_path
+
+ log_file_path = os.path.join(host_files_dir_path, "out.txt")
+ with open(log_file_path, "w") as log_file:
+ print(f"CONFIGURATION: {desc}")
+ log_file.write(f"CONFIGURATION: {desc}\n")
+
+ try:
+ ns_tir_module = _get_irmod_elemwise_add(_PRIMFUNC_NAME, shape,
dtype, mem_scope)
+
+ # Dump the primfunc NS-TIR (as text) to the log file...
+ lowered_mod = tvm.lower(ns_tir_module, _PRIMFUNC_NAME)
+ log_file.write("LOWERED IR MODULE:\n")
+ log_file.write(str(lowered_mod))
+ log_file.write("\n")
+
+ # Lower the primfunc's IRModule to Hexagon object code...
+ A = tvm.te.placeholder(shape, dtype=dtype)
+ B = tvm.te.placeholder(shape, dtype=dtype)
+ C = tvm.te.placeholder(shape, dtype=dtype)
+
+ built_module: tvm.driver.build_module.OperatorModule = tvm.build(
+ ns_tir_module,
+ [
+ A,
+ B,
+ C,
+ ],
+ _SUPER_TARGET,
+ name=_PRIMFUNC_NAME,
+ )
+
+ # Create an actual Hexagon-native shared object file, initially
stored on the
+ # host's file system...
+ host_dso_binary_path = os.path.join(host_files_dir_path,
"test_binary.so")
+ built_module.save(host_dso_binary_path)
+ print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+ # Upload the .so to the Android device's file system (or wherever
is appropriate
+ # when using the Hexagon simulator)...
+ target_dso_binary_filename = "test_binary.so"
+ hexagon_launcher.upload(host_dso_binary_path,
target_dso_binary_filename)
+
+ # Generate our testing / validation data...
+ (
+ host_numpy_A_data,
+ host_numpy_B_data,
+ host_numpy_C_data_expected,
+ ) = _get_elemwise_add_reference_value_tensors(shape, dtype)
+
+ with hexagon_launcher.start_session() as sess:
+ # On the target device / simulator, make our Hexagon-native
shared object
+ # available for use...
+ loaded_hexagon_module: tvm.runtime.module.Module =
hexagon_launcher.load_module(
+ target_dso_binary_filename, sess
+ )
+
+ # Create the target-side tensors to hold the primfunc's inputs
and outputs...
+ A_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+ B_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+ C_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+
+ # Populate the primfunc's input tensors...
+ A_data.copyfrom(host_numpy_A_data)
+ B_data.copyfrom(host_numpy_B_data)
+
+ # Actually benchmark the primfunc...
+ timer = loaded_hexagon_module.time_evaluator(
+ "main", sess.device, number=10, repeat=1
+ )
+ timing_result = timer(A_data, B_data, C_data)
+
+ print(f"TIMING RESULT: {timing_result}")
+ log_file.write(f"TIMING RESULT: {timing_result}\n")
+
+ # Verify that the computation actually happened, and produced
the correct result.
+ result = C_data.numpy()
+
+ if dtype == "float16":
+ # These are the closest tolerance we currently expect /
require for these
+ # kernels. They may be changed in the future.
+ rel_tolerance = 0.005
+ abs_tolerance = 2.0
+ elif dtype == "int8":
+ rel_tolerance = 0
+ abs_tolerance = 0
+ else:
+ raise Exception(f"Unexpected dtype: {dtype}")
+
+ # TODO: We're assuming that *any* assertion thrown by
'assert_allclose' is because
+ # the numerical differences were too large. But ideally this
code would
+ # differentiate between (a) numerical difference errors, which
should simply be
+ # recorded as a failed benchmark run, vs. (b) more serious
errors that should
+ # kill the overall script.
+ try:
+ tvm.testing.assert_allclose(
+ result, host_numpy_C_data_expected, rel_tolerance,
abs_tolerance
+ )
+ except AssertionError as e:
+ raise NumericalAccuracyException(str(e))
+
+ _BT.record_success(timing_result, **keys_dict)
+
+ except NumericalAccuracyException as e:
+ print()
+ print(f"FAIL: Numerical accuracy error. See log file.")
+
+ log_file.write("\n")
+ log_file.write(f"FAIL: {e}\n")
+
+ _BT.record_fail(**keys_dict, comments=f"Numerical accuracy error.
See log file.")
+
+ except UnsupportedException as e:
+ print()
+ print(f"SKIP: {e}")
+
+ log_file.write("\n")
+ log_file.write(f"SKIP: {e}\n")
+
+ _BT.record_skip(**keys_dict, comments=f"Unsupported configuration:
{e}")
+
+
+def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str):
+ """
+ Return [A:np.array, B:np.array, C:np.array]
+
+ `A`, `B`, and `C` are reference data used to exercise and validate
+ an elementwise-add kernel: C = A+B.
+
+ NOTE: These data are primarily meant for performance testing.
+ The values may be helpful in detecting correctness issues, but that's
+ a secondary consideration here.
+ """
+ assert len(shape) == 2
+
+ A = np.ndarray(shape, dtype=dtype)
+ B = np.ndarray(shape, dtype=dtype)
+
+ np_dtype = A.dtype
+
+ if np_dtype.kind in ["i", "u"]:
+ # We allow overflow for integer types because it tends to be
well-behaved
+ # and well-understood...
+ min_value = np.iinfo(np_dtype).min
+ max_value = np.iinfo(np_dtype).max
+
+ next_value = min_value
+
+ for i in range(shape[0]):
+ for j in range(shape[1]):
+ A[i, j] = next_value
+ B[i, j] = next_value * 2
+ next_value += 1
+
+ elif np_dtype.kind == "f":
+ # NOTE: For simplicity, we avoid test data that that require
+ # well-defined behavior on floating-point overflow.
+ # But it may be reasonable to test that in the future.
+ min_value = np.finfo(np_dtype).min
+ max_value = np.finfo(np_dtype).max
+
+ min_input_value = min_value / 2.0 + 1
+ max_input_value = max_value / 2.0 - 2
+ delta = (max_input_value - min_input_value) / (shape[0] * shape[1])
+
+ next_value = min_input_value
+
+ for i in range(shape[0]):
+ for j in range(shape[1]):
+ A[i, j] = next_value
+ B[i, j] = next_value + 1
+ next_value += delta
+
+ else:
+ assert False, f"Unexpected data type: {np_dtype}"
+
+ C = A + B
+ return [
+ A,
+ B,
+ C,
+ ]
+
+
[email protected]_hexagon
+def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC):
+ for dtype in [
+ "int8",
+ "float16",
+ ]:
+
+ for mem_scope in [
+ "global",
+ "global.vtcm",
+ ]:
+
+ # These numbers are fairly arbitrary, but they're meant to stress
memory/caches to
+ # various extents.
+ for num_vectors_per_tensor in [
+ 1,
+ 16,
+ 64,
+ 512,
+ 2048,
+ ]:
+
+ dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
+ assert dtype_bits % 8 == 0
+ dtype_bytes = dtype_bits // 8
+
+ elem_per_hvx_vector = HVX_VECTOR_BYTES // dtype_bytes
+
+ shape = [
+ num_vectors_per_tensor,
+ elem_per_hvx_vector,
+ ]
+
+ print()
+ _benchmark_hexagon_elementwise_add_kernel(hexagon_launcher,
shape, dtype, mem_scope)
+
+ print("-" * 80)
+ print(f"OUTPUT DIRECTORY: {_HOST_OUTPUT_DIR}")
+ print("-" * 80)
+ print()
+
+ tabular_output_filename = os.path.join(_HOST_OUTPUT_DIR,
"benchmark-results.csv")
+ with open(tabular_output_filename, "w") as csv_file:
+ _BT.print_csv(csv_file, _CSV_COLUMN_ORDER)
+
+ print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+
+ _BT.print_csv(sys.stdout, _CSV_COLUMN_ORDER)
+
+ if _BT.has_fail() > 0:
+ pytest.fail("At least one benchmark configuration failed",
pytrace=False)
diff --git a/tests/python/contrib/test_hexagon/benchmark_hexagon.py
b/tests/python/contrib/test_hexagon/benchmark_hexagon.py
deleted file mode 100644
index 2a1d6796e7..0000000000
--- a/tests/python/contrib/test_hexagon/benchmark_hexagon.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import os.path
-import sys
-import pytest
-import numpy as np
-import logging
-import tempfile
-
-import tvm.testing
-from tvm import te
-from tvm.contrib.hexagon.build import HexagonLauncherRPC
-from .benchmark_util import BenchmarksTable
-
-RPC_SERVER_PORT = 7070
-
-# This is a fixed detail of the v68 architecture.
-HVX_VECTOR_BYTES = 128
-
-# NOTE on server ports:
-# These tests use different port numbers for the RPC server (7070 + ...).
-# The reason is that an RPC session cannot be gracefully closed without
-# triggering TIME_WAIT state on the server socket. This prevents another
-# server to bind to the same port until the wait time elapses.
-
-
[email protected]_hexagon
-def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC):
- """
- Starting with an elementwise-add computation, try various schedules /
optimizations to
- see the impact they have on performance.
-
- The main motivation for this test is to explore the relationship between
these
- schedules / optimizations vs. how effectively the primfunc uses the
Hexagon's
- HVX units.
- """
- host_output_dir = tempfile.mkdtemp()
-
- print("-" * 80)
- print("OUTPUT DIRECTORY: {}".format(host_output_dir))
- print("-" * 80)
- print()
-
- bt = BenchmarksTable()
-
- # Create and benchmark a single primfunc.
- # If an unexpected problem occurs, raise an exception. Otherwise add a
row of output to 'bt'.
- def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor):
- version_name =
f"dtype:{dtype}-schedtype:{sched_type}-memscope:{mem_scope}-numvecs:{num_vectors_per_tensor}"
- print()
- print(f"CONFIGURATION: {version_name}")
-
- if num_vectors_per_tensor == 2048 and mem_scope == "global.vtcm":
- bt.record_skip(
- dtype=dtype,
- sched_type=sched_type,
- mem_scope=mem_scope,
- num_vectors_per_tensor=num_vectors_per_tensor,
- comments="Expect to exceed VTCM budget.",
- )
- return
-
- dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
- assert dtype_bits % 8 == 0
- dtype_bytes = dtype_bits // 8
-
- elem_per_hvx_vector = HVX_VECTOR_BYTES // dtype_bytes
-
- # Note! We're providing the complete input tensor shapes now,
- # whereas the original code only reveals the exact shape when
- # about to call the kernel.
-
- shape = [
- num_vectors_per_tensor,
- elem_per_hvx_vector,
- ]
-
- A = tvm.te.placeholder(shape, dtype=dtype)
- B = tvm.te.placeholder(shape, dtype=dtype)
- C = tvm.te.compute(A.shape, lambda i, j: A[i, j] + B[i, j], name="C")
-
- sched = tvm.te.create_schedule(C.op)
-
- if sched_type == 1:
- pass
- elif sched_type == 2:
- sched[C].vectorize(C.op.axis[1])
- else:
- raise Exception("Unknown schedule type")
-
- # If we're using VTCM, we *must* add a transform_layout step to the
schedule.
- # Otherwise the generated code will crash.
- # As of 2022-04-12 the crash does not provide a useful error message
to the
- # host Python code.
- if mem_scope == "global.vtcm":
- for tensor in [A, B, C]:
- sched[tensor].transform_layout(lambda i, j: [i,
te.AXIS_SEPARATOR, j])
-
- # This module is only created so humans can inspect its IR.
- module_for_ir_dump = tvm.lower(sched, [A, B, C], "foo")
-
- report_path = os.path.join(host_output_dir, f"{version_name}.txt")
-
- with open(report_path, "w") as f:
- f.write("LOWERED IR MODULE:\n")
- f.write(str(module_for_ir_dump))
- f.write("\n")
-
- target_hexagon = tvm.target.hexagon("v68", link_params=True)
- func = tvm.build(
- sched,
- [A, B, C],
- tvm.target.Target(target_hexagon, host=target_hexagon),
- name="elemwise_add",
- )
-
- host_dso_binary_path = os.path.join(host_output_dir,
f"test_binary-{version_name}.so")
- target_dso_binary_filename = "test_binary.so"
-
- func.save(str(host_dso_binary_path))
- print("SAVED BINARY TO HOST PATH:
{}".format(str(host_dso_binary_path)))
-
- hexagon_launcher.upload(host_dso_binary_path,
target_dso_binary_filename)
-
- try:
- with hexagon_launcher.start_session() as sess:
- mod =
hexagon_launcher.load_module(target_dso_binary_filename, sess)
-
- host_numpy_A_data = np.ndarray(shape, dtype=dtype)
- host_numpy_B_data = np.ndarray(shape, dtype=dtype)
-
- for i in range(shape[0]):
- for j in range(shape[1]):
- host_numpy_A_data[i, j] = i + j
- host_numpy_B_data[i, j] = (i + 1) * (j + 1)
-
- host_numpy_C_data_expected = host_numpy_A_data +
host_numpy_B_data
-
- A_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
- A_data.copyfrom(host_numpy_A_data)
-
- B_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
- B_data.copyfrom(host_numpy_B_data)
-
- C_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
-
- # NOTE: We may want to soften these numbers, depending on
future findings.
- timer = mod.time_evaluator("elemwise_add", sess.device,
number=10, repeat=1)
- timing_result = timer(A_data, B_data, C_data)
-
- # Verify that the computation actually happened, and
produced the correct result.
- result = C_data.numpy()
- tvm.testing.assert_allclose(host_numpy_C_data_expected,
result)
-
- bt.record_success(
- timing_result,
- dtype=dtype,
- sched_type=sched_type,
- mem_scope=mem_scope,
- num_vectors_per_tensor=num_vectors_per_tensor,
- )
-
- except Exception as err:
- f.write("ERROR:\n")
- f.write("{}\n".format(err))
- bt.record_fail(
- dtype=dtype,
- sched_type=sched_type,
- mem_scope=mem_scope,
- num_vectors_per_tensor=num_vectors_per_tensor,
- comments=f"See {report_path}",
- )
-
- #
-----------------------------------------------------------------------------------------------
-
- csv_column_order = [
- "dtype",
- "sched_type",
- "mem_scope",
- "num_vectors_per_tensor",
- "row_status",
- "timings_min_usecs",
- "timings_max_usecs",
- "timings_median_usecs",
- "timings_mean_usecs",
- "timings_stddev_usecs",
- "comments",
- ]
-
- # Hexagon v69 allows more dtypes, but we're sticking with v68 for now.
- for dtype in [
- "int8",
- ]:
-
- # These numbers are only meaningful in the context of this script.
- for sched_type in [
- 1,
- 2,
- ]:
-
- for mem_scope in ["global", "global.vtcm"]:
-
- # These numbers are fairly arbitrary, but they're meant to
stress memory/caches to
- # various extents.
- for num_vectors_per_tensor in [
- 1,
- 16,
- 64,
- 512,
- 2048,
- ]:
-
- test_one_config(dtype, sched_type, mem_scope,
num_vectors_per_tensor)
-
- # Report our progress.
- bt.print_csv(sys.stdout, csv_column_order)
-
- print("-" * 80)
- print(f"OUTPUT DIRECTORY: {host_output_dir}")
- print("-" * 80)
- print()
-
- tabular_output_filename = os.path.join(host_output_dir,
"benchmark-results.csv")
- with open(tabular_output_filename, "w") as csv_file:
- bt.print_csv(csv_file, csv_column_order)
- print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
-
- if bt.has_fail() > 0:
- pytest.fail("At least one benchmark configuration failed",
pytrace=False)
diff --git a/tests/python/contrib/test_hexagon/benchmark_util.py
b/tests/python/contrib/test_hexagon/benchmark_util.py
index 5a75e9a6e8..113c7780c1 100644
--- a/tests/python/contrib/test_hexagon/benchmark_util.py
+++ b/tests/python/contrib/test_hexagon/benchmark_util.py
@@ -139,3 +139,37 @@ class BenchmarksTable:
csv_line_dict[col_name] = str_value
writer.writerow(csv_line_dict)
+
+
+def get_benchmark_id(keys_dict):
+ """
+ Given a dictionary with the distinguishing characteristics of a particular
benchmark
+ line item, compute a string that uniquely identifies the benchmark.
+
+ The returned string:
+ - is a valid directory name on the host's file systems, and
+ - should be easy for humans to parse
+
+ Note that the insertion order for `keys_dict` affects the computed name.
+ """
+ # Creat a copy, because we might be modifying it.
+ d = dict(keys_dict)
+
+ # Sniff for shape-like lists, because we want them in a form that's both
+ # readable and filesystem-friendly...
+ for k, v in d.items():
+ if isinstance(v, list) or isinstance(v, tuple):
+ v2 = "_".join([str(x) for x in v])
+ d[k] = v2
+
+ return "-".join([f"{k}:{v}" for k, v in d.items()])
+
+
+def get_benchmark_decription(keys_dict):
+ """
+ Similar to `get_benchmark_id`, but the focus is on human-readability.
+
+ The returned string contains no line-breaks, but may contain spaces and
+ other characters that make it unsuitable for use as a filename.
+ """
+ return " ".join([f"{k}={v}" for k, v in keys_dict.items()])