[tvm] branch main updated: [hexagon][testing] add TIRScript elemwise-add (#11490)

csullivan Fri, 03 Jun 2022 08:58:42 -0700

This is an automated email from the ASF dual-hosted git repository.

csullivan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 2ae20882d3 [hexagon][testing] add TIRScript elemwise-add (#11490)
2ae20882d3 is described below

commit 2ae20882d3e34cc6e5acef992c23c17a585c25aa
Author: Christian Convey <[email protected]>
AuthorDate: Fri Jun 3 11:58:30 2022 -0400

    [hexagon][testing] add TIRScript elemwise-add (#11490)
    
    Replace TE-based elementwise-add benchmark with
    a TVMScript-based one.
    
    Update Hexagon target architecture from v68 to v69.
    As a result, the benchmark now requires a version of
    Hexagon SDK newer than 4.4.0.1.  Version 4.5.0.3 is
    known to work.
---
 .../contrib/test_hexagon/benchmark_elemwise_add.py | 434 +++++++++++++++++++++
 .../contrib/test_hexagon/benchmark_hexagon.py      | 245 ------------
 .../python/contrib/test_hexagon/benchmark_util.py  |  34 ++
 3 files changed, 468 insertions(+), 245 deletions(-)

diff --git a/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py 
b/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py
new file mode 100644
index 0000000000..70266d7939
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/benchmark_elemwise_add.py
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import os.path
+import sys
+import pytest
+import numpy as np
+import logging
+import tempfile
+
+import tvm.testing
+import tvm.script
+from tvm.script import tir as T
+from tvm import te
+from tvm.contrib.hexagon.build import HexagonLauncherRPC
+from . import benchmark_util
+
+# This is a fixed detail of the v68 architecture.
+HVX_VECTOR_BYTES = 128
+
+_HEXAGON_TARGET = tvm.target.hexagon("v69", link_params=True)
+
+_SUPER_TARGET = tvm.target.Target(_HEXAGON_TARGET, host=_HEXAGON_TARGET)
+
+# NOTE on server ports:
+# These tests use different port numbers for the RPC server (7070 + ...).
+# The reason is that an RPC session cannot be gracefully closed without
+# triggering TIME_WAIT state on the server socket. This prevents another
+# server to bind to the same port until the wait time elapses.
+
+_BT = benchmark_util.BenchmarksTable()
+
+_CSV_COLUMN_ORDER = [
+    # Identifies which TE-compute / TIRScript is used as the basis for the
+    # benchmarked primfunc. Only needs to be meaningful to humans.
+    "basic_kernel",
+    # The tensors' element type
+    "dtype",
+    # When applicable, indicates the particular variation of schedules
+    # apply by the Python code. Decoding this may require looking at this
+    # script's source code.
+    "sched_type",
+    # The memory location of the tensors used during the execution of
+    # the primfunc.  We currently assume just one location.
+    # This will likely need to be generalized as we add more sophisticated
+    # primfuncs.
+    "mem_scope",
+    # For primfuncs that treat tensor buffers as collections of 1D vectors,
+    # this is the number of vectors in each tensor.
+    # This will likely need to be generalized as we add more sophisticated
+    # primfuncs.
+    "num_vectors_per_tensor",
+    # Reserved columns defined by the BenchmarksTable class.
+    "row_status",
+    "timings_min_usecs",
+    "timings_max_usecs",
+    "timings_median_usecs",
+    "timings_mean_usecs",
+    "timings_stddev_usecs",
+    # For benchmarks that produce files on the host file system, this indicates
+    # their location. Useful for post-mortem investigation of benchmark 
results.
+    "host_files_dir_path",
+    # Miscellaneous comments about the benchmark.
+    "comments",
+]
+
+_HOST_OUTPUT_DIR = tempfile.mkdtemp()
+
+_PRIMFUNC_NAME = "elemwise_add"
+
+print("-" * 80)
+print("OUTPUT DIRECTORY: {}".format(_HOST_OUTPUT_DIR))
+print("-" * 80)
+print()
+
+
+class UnsupportedException(Exception):
+    """
+    Indicates that the specified benchmarking configuration is known to
+    currently be unsupported.  The Exception message may provide more detail.
+    """
+
+
+class NumericalAccuracyException(Exception):
+    """
+    Indicates that the benchmarking configuration appeared to run successfully,
+    but the output data didn't have the expected accuracy.
+    """
+
+
+from typing import Tuple
+
+
+def _get_irmod_elemwise_add(
+    _PRIMFUNC_NAME: str, shape: list, dtype: str, mem_scope: str
+) -> tvm.ir.module.IRModule:
+    """
+    Return an IRModule containing a single primfunc, expressed as NS-TIR.
+
+    The primfunc implements elementwise-add. Its signature is (A,B,C), where
+    A and B are the input tensors, and C is the output tensor.
+    All three tensors have the specfied shape, dtype, and mem_scope.
+
+    If the specified primfunc is known to be unsupported, raise an 
UnsupportedExcetion.
+    """
+    assert len(shape) == 2
+
+    # TVMScript can reference simple Python variables, but it doesn't
+    # curently support more complex Python expressions...
+    (
+        dim0_size,
+        dim1_size,
+    ) = shape
+    dtype_str = str(dtype)
+
+    if mem_scope == "global.vtcm":
+        raise UnsupportedException("This benchmark kernel does not yet support 
VTCM buffers.")
+
+        # This check is currently elided by the one above, but it should 
become relevant as soon
+        # as we add VTCM support to this kernel generator.
+        #
+        # Also: The VTCM budget is a very rough estimate, based only on 
experience.
+        # Assuming that it's even reasonable to use a hard-coded estimate AT 
ALL, this number
+        # may need tweaking.
+        estimated_vtcm_budget_bytes = HVX_VECTOR_BYTES * 1024
+
+        dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
+        assert dtype_bits % 8 == 0
+        dtype_bytes = dtype_bits // 8
+
+        num_vtcm_tensors = 3
+        estimated_vtcm_needed_bytes = shape[0] * shape[1] * dtype_bytes * 
num_vtcm_tensors
+
+        if estimated_vtcm_needed_bytes > estimated_vtcm_budget_bytes:
+            raise UnsupportedException("Expect to exceed VTCM budget.")
+
+    @tvm.script.ir_module
+    class BenchmarkModule:
+        @T.prim_func
+        def main(a: T.handle, b: T.handle, c: T.handle):
+            # We exchange data between function by handles, which are similar 
to pointer.
+            T.func_attr({"global_symbol": "main", "tir.noalias": True})
+
+            A = T.match_buffer(a, shape, dtype=dtype)
+            B = T.match_buffer(b, shape, dtype=dtype)
+            C = T.match_buffer(c, shape, dtype=dtype)
+
+            for i in range(dim0_size):
+                for j in range(dim1_size):
+                    C[i, j] = A[i, j] + B[i, j]
+
+    return BenchmarkModule
+
+
+def _benchmark_hexagon_elementwise_add_kernel(
+    hexagon_launcher: HexagonLauncherRPC, shape: list, dtype: str, mem_scope: 
str
+):
+    """
+    Generate and benchmark a single elementwise-add kernel for Hexagon.
+
+    Produce these outputs:
+      - Printed status updates / results to stdout and/or stderr.
+
+      - Create a new subdirectory under _HOST_OUTPUT_DIR, and populate it with
+        various logs and intermediate files.
+
+      - Add to _BT a row describing this benchmark run.
+    """
+    # Represent the benchmark details in a form required by the benchmark table
+    # and for other logging...
+    keys_dict = {
+        "basic_kernel": "ewise-add",
+        "dtype": dtype,
+        "shape": shape,
+        "mem_scope": mem_scope,
+    }
+
+    desc = benchmark_util.get_benchmark_decription(keys_dict)
+
+    # Create the host-side directory for this benchmark run's files / logs...
+    host_files_dir_name = benchmark_util.get_benchmark_id(keys_dict)
+    host_files_dir_path = os.path.join(_HOST_OUTPUT_DIR, host_files_dir_name)
+    os.mkdir(host_files_dir_path)
+
+    keys_dict["host_files_dir_path"] = host_files_dir_path
+
+    log_file_path = os.path.join(host_files_dir_path, "out.txt")
+    with open(log_file_path, "w") as log_file:
+        print(f"CONFIGURATION: {desc}")
+        log_file.write(f"CONFIGURATION: {desc}\n")
+
+        try:
+            ns_tir_module = _get_irmod_elemwise_add(_PRIMFUNC_NAME, shape, 
dtype, mem_scope)
+
+            # Dump the primfunc NS-TIR (as text) to the log file...
+            lowered_mod = tvm.lower(ns_tir_module, _PRIMFUNC_NAME)
+            log_file.write("LOWERED IR MODULE:\n")
+            log_file.write(str(lowered_mod))
+            log_file.write("\n")
+
+            # Lower the primfunc's IRModule to Hexagon object code...
+            A = tvm.te.placeholder(shape, dtype=dtype)
+            B = tvm.te.placeholder(shape, dtype=dtype)
+            C = tvm.te.placeholder(shape, dtype=dtype)
+
+            built_module: tvm.driver.build_module.OperatorModule = tvm.build(
+                ns_tir_module,
+                [
+                    A,
+                    B,
+                    C,
+                ],
+                _SUPER_TARGET,
+                name=_PRIMFUNC_NAME,
+            )
+
+            # Create an actual Hexagon-native shared object file, initially 
stored on the
+            # host's file system...
+            host_dso_binary_path = os.path.join(host_files_dir_path, 
"test_binary.so")
+            built_module.save(host_dso_binary_path)
+            print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+            # Upload the .so to the Android device's file system (or wherever 
is appropriate
+            # when using the Hexagon simulator)...
+            target_dso_binary_filename = "test_binary.so"
+            hexagon_launcher.upload(host_dso_binary_path, 
target_dso_binary_filename)
+
+            # Generate our testing / validation data...
+            (
+                host_numpy_A_data,
+                host_numpy_B_data,
+                host_numpy_C_data_expected,
+            ) = _get_elemwise_add_reference_value_tensors(shape, dtype)
+
+            with hexagon_launcher.start_session() as sess:
+                # On the target device / simulator, make our Hexagon-native 
shared object
+                # available for use...
+                loaded_hexagon_module: tvm.runtime.module.Module = 
hexagon_launcher.load_module(
+                    target_dso_binary_filename, sess
+                )
+
+                # Create the target-side tensors to hold the primfunc's inputs 
and outputs...
+                A_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+                B_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+                C_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
+
+                # Populate the primfunc's input tensors...
+                A_data.copyfrom(host_numpy_A_data)
+                B_data.copyfrom(host_numpy_B_data)
+
+                # Actually benchmark the primfunc...
+                timer = loaded_hexagon_module.time_evaluator(
+                    "main", sess.device, number=10, repeat=1
+                )
+                timing_result = timer(A_data, B_data, C_data)
+
+                print(f"TIMING RESULT: {timing_result}")
+                log_file.write(f"TIMING RESULT: {timing_result}\n")
+
+                # Verify that the computation actually happened, and produced 
the correct result.
+                result = C_data.numpy()
+
+                if dtype == "float16":
+                    # These are the closest tolerance we currently expect / 
require for these
+                    # kernels.  They may be changed in the future.
+                    rel_tolerance = 0.005
+                    abs_tolerance = 2.0
+                elif dtype == "int8":
+                    rel_tolerance = 0
+                    abs_tolerance = 0
+                else:
+                    raise Exception(f"Unexpected dtype: {dtype}")
+
+                # TODO: We're assuming that *any* assertion thrown by 
'assert_allclose' is because
+                # the numerical differences were too large.  But ideally this 
code would
+                # differentiate between (a) numerical difference errors, which 
should simply be
+                # recorded as a failed benchmark run, vs. (b) more serious 
errors that should
+                # kill the overall script.
+                try:
+                    tvm.testing.assert_allclose(
+                        result, host_numpy_C_data_expected, rel_tolerance, 
abs_tolerance
+                    )
+                except AssertionError as e:
+                    raise NumericalAccuracyException(str(e))
+
+                _BT.record_success(timing_result, **keys_dict)
+
+        except NumericalAccuracyException as e:
+            print()
+            print(f"FAIL: Numerical accuracy error. See log file.")
+
+            log_file.write("\n")
+            log_file.write(f"FAIL: {e}\n")
+
+            _BT.record_fail(**keys_dict, comments=f"Numerical accuracy error. 
See log file.")
+
+        except UnsupportedException as e:
+            print()
+            print(f"SKIP: {e}")
+
+            log_file.write("\n")
+            log_file.write(f"SKIP: {e}\n")
+
+            _BT.record_skip(**keys_dict, comments=f"Unsupported configuration: 
{e}")
+
+
+def _get_elemwise_add_reference_value_tensors(shape: list, dtype: str):
+    """
+    Return [A:np.array, B:np.array, C:np.array]
+
+    `A`, `B`, and `C` are reference data used to exercise and validate
+    an elementwise-add kernel: C = A+B.
+
+    NOTE: These data are primarily meant for performance testing.
+    The values may be helpful in detecting correctness issues, but that's
+    a secondary consideration here.
+    """
+    assert len(shape) == 2
+
+    A = np.ndarray(shape, dtype=dtype)
+    B = np.ndarray(shape, dtype=dtype)
+
+    np_dtype = A.dtype
+
+    if np_dtype.kind in ["i", "u"]:
+        # We allow overflow for integer types because it tends to be 
well-behaved
+        # and well-understood...
+        min_value = np.iinfo(np_dtype).min
+        max_value = np.iinfo(np_dtype).max
+
+        next_value = min_value
+
+        for i in range(shape[0]):
+            for j in range(shape[1]):
+                A[i, j] = next_value
+                B[i, j] = next_value * 2
+                next_value += 1
+
+    elif np_dtype.kind == "f":
+        # NOTE: For simplicity, we avoid test data that that require
+        # well-defined behavior on floating-point overflow.
+        # But it may be reasonable to test that in the future.
+        min_value = np.finfo(np_dtype).min
+        max_value = np.finfo(np_dtype).max
+
+        min_input_value = min_value / 2.0 + 1
+        max_input_value = max_value / 2.0 - 2
+        delta = (max_input_value - min_input_value) / (shape[0] * shape[1])
+
+        next_value = min_input_value
+
+        for i in range(shape[0]):
+            for j in range(shape[1]):
+                A[i, j] = next_value
+                B[i, j] = next_value + 1
+                next_value += delta
+
+    else:
+        assert False, f"Unexpected data type: {np_dtype}"
+
+    C = A + B
+    return [
+        A,
+        B,
+        C,
+    ]
+
+
[email protected]_hexagon
+def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC):
+    for dtype in [
+        "int8",
+        "float16",
+    ]:
+
+        for mem_scope in [
+            "global",
+            "global.vtcm",
+        ]:
+
+            # These numbers are fairly arbitrary, but they're meant to stress 
memory/caches to
+            # various extents.
+            for num_vectors_per_tensor in [
+                1,
+                16,
+                64,
+                512,
+                2048,
+            ]:
+
+                dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
+                assert dtype_bits % 8 == 0
+                dtype_bytes = dtype_bits // 8
+
+                elem_per_hvx_vector = HVX_VECTOR_BYTES // dtype_bytes
+
+                shape = [
+                    num_vectors_per_tensor,
+                    elem_per_hvx_vector,
+                ]
+
+                print()
+                _benchmark_hexagon_elementwise_add_kernel(hexagon_launcher, 
shape, dtype, mem_scope)
+
+    print("-" * 80)
+    print(f"OUTPUT DIRECTORY: {_HOST_OUTPUT_DIR}")
+    print("-" * 80)
+    print()
+
+    tabular_output_filename = os.path.join(_HOST_OUTPUT_DIR, 
"benchmark-results.csv")
+    with open(tabular_output_filename, "w") as csv_file:
+        _BT.print_csv(csv_file, _CSV_COLUMN_ORDER)
+
+    print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+
+    _BT.print_csv(sys.stdout, _CSV_COLUMN_ORDER)
+
+    if _BT.has_fail() > 0:
+        pytest.fail("At least one benchmark configuration failed", 
pytrace=False)
diff --git a/tests/python/contrib/test_hexagon/benchmark_hexagon.py 
b/tests/python/contrib/test_hexagon/benchmark_hexagon.py
deleted file mode 100644
index 2a1d6796e7..0000000000
--- a/tests/python/contrib/test_hexagon/benchmark_hexagon.py
+++ /dev/null
@@ -1,245 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import os.path
-import sys
-import pytest
-import numpy as np
-import logging
-import tempfile
-
-import tvm.testing
-from tvm import te
-from tvm.contrib.hexagon.build import HexagonLauncherRPC
-from .benchmark_util import BenchmarksTable
-
-RPC_SERVER_PORT = 7070
-
-# This is a fixed detail of the v68 architecture.
-HVX_VECTOR_BYTES = 128
-
-# NOTE on server ports:
-# These tests use different port numbers for the RPC server (7070 + ...).
-# The reason is that an RPC session cannot be gracefully closed without
-# triggering TIME_WAIT state on the server socket. This prevents another
-# server to bind to the same port until the wait time elapses.
-
-
[email protected]_hexagon
-def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC):
-    """
-    Starting with an elementwise-add computation, try various schedules / 
optimizations to
-    see the impact they have on performance.
-
-    The main motivation for this test is to explore the relationship between 
these
-    schedules / optimizations vs. how effectively the primfunc uses the 
Hexagon's
-    HVX units.
-    """
-    host_output_dir = tempfile.mkdtemp()
-
-    print("-" * 80)
-    print("OUTPUT DIRECTORY: {}".format(host_output_dir))
-    print("-" * 80)
-    print()
-
-    bt = BenchmarksTable()
-
-    # Create and benchmark a single primfunc.
-    # If an unexpected problem occurs, raise an exception.  Otherwise add a 
row of output to 'bt'.
-    def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor):
-        version_name = 
f"dtype:{dtype}-schedtype:{sched_type}-memscope:{mem_scope}-numvecs:{num_vectors_per_tensor}"
-        print()
-        print(f"CONFIGURATION: {version_name}")
-
-        if num_vectors_per_tensor == 2048 and mem_scope == "global.vtcm":
-            bt.record_skip(
-                dtype=dtype,
-                sched_type=sched_type,
-                mem_scope=mem_scope,
-                num_vectors_per_tensor=num_vectors_per_tensor,
-                comments="Expect to exceed VTCM budget.",
-            )
-            return
-
-        dtype_bits = tvm._ffi.runtime_ctypes.DataType(dtype).bits
-        assert dtype_bits % 8 == 0
-        dtype_bytes = dtype_bits // 8
-
-        elem_per_hvx_vector = HVX_VECTOR_BYTES // dtype_bytes
-
-        # Note!  We're providing the complete input tensor shapes now,
-        # whereas the original code only reveals the exact shape when
-        # about to call the kernel.
-
-        shape = [
-            num_vectors_per_tensor,
-            elem_per_hvx_vector,
-        ]
-
-        A = tvm.te.placeholder(shape, dtype=dtype)
-        B = tvm.te.placeholder(shape, dtype=dtype)
-        C = tvm.te.compute(A.shape, lambda i, j: A[i, j] + B[i, j], name="C")
-
-        sched = tvm.te.create_schedule(C.op)
-
-        if sched_type == 1:
-            pass
-        elif sched_type == 2:
-            sched[C].vectorize(C.op.axis[1])
-        else:
-            raise Exception("Unknown schedule type")
-
-        # If we're using VTCM, we *must* add a transform_layout step to the 
schedule.
-        # Otherwise the generated code will crash.
-        # As of 2022-04-12 the crash does not provide a useful error message 
to the
-        # host Python code.
-        if mem_scope == "global.vtcm":
-            for tensor in [A, B, C]:
-                sched[tensor].transform_layout(lambda i, j: [i, 
te.AXIS_SEPARATOR, j])
-
-        # This module is only created so humans can inspect its IR.
-        module_for_ir_dump = tvm.lower(sched, [A, B, C], "foo")
-
-        report_path = os.path.join(host_output_dir, f"{version_name}.txt")
-
-        with open(report_path, "w") as f:
-            f.write("LOWERED IR MODULE:\n")
-            f.write(str(module_for_ir_dump))
-            f.write("\n")
-
-            target_hexagon = tvm.target.hexagon("v68", link_params=True)
-            func = tvm.build(
-                sched,
-                [A, B, C],
-                tvm.target.Target(target_hexagon, host=target_hexagon),
-                name="elemwise_add",
-            )
-
-            host_dso_binary_path = os.path.join(host_output_dir, 
f"test_binary-{version_name}.so")
-            target_dso_binary_filename = "test_binary.so"
-
-            func.save(str(host_dso_binary_path))
-            print("SAVED BINARY TO HOST PATH: 
{}".format(str(host_dso_binary_path)))
-
-            hexagon_launcher.upload(host_dso_binary_path, 
target_dso_binary_filename)
-
-            try:
-                with hexagon_launcher.start_session() as sess:
-                    mod = 
hexagon_launcher.load_module(target_dso_binary_filename, sess)
-
-                    host_numpy_A_data = np.ndarray(shape, dtype=dtype)
-                    host_numpy_B_data = np.ndarray(shape, dtype=dtype)
-
-                    for i in range(shape[0]):
-                        for j in range(shape[1]):
-                            host_numpy_A_data[i, j] = i + j
-                            host_numpy_B_data[i, j] = (i + 1) * (j + 1)
-
-                    host_numpy_C_data_expected = host_numpy_A_data + 
host_numpy_B_data
-
-                    A_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
-                    A_data.copyfrom(host_numpy_A_data)
-
-                    B_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
-                    B_data.copyfrom(host_numpy_B_data)
-
-                    C_data = tvm.nd.empty(shape, dtype, sess.device, mem_scope)
-
-                    # NOTE: We may want to soften these numbers, depending on 
future findings.
-                    timer = mod.time_evaluator("elemwise_add", sess.device, 
number=10, repeat=1)
-                    timing_result = timer(A_data, B_data, C_data)
-
-                    # Verify that the computation actually happened, and 
produced the correct result.
-                    result = C_data.numpy()
-                    tvm.testing.assert_allclose(host_numpy_C_data_expected, 
result)
-
-                    bt.record_success(
-                        timing_result,
-                        dtype=dtype,
-                        sched_type=sched_type,
-                        mem_scope=mem_scope,
-                        num_vectors_per_tensor=num_vectors_per_tensor,
-                    )
-
-            except Exception as err:
-                f.write("ERROR:\n")
-                f.write("{}\n".format(err))
-                bt.record_fail(
-                    dtype=dtype,
-                    sched_type=sched_type,
-                    mem_scope=mem_scope,
-                    num_vectors_per_tensor=num_vectors_per_tensor,
-                    comments=f"See {report_path}",
-                )
-
-    # 
-----------------------------------------------------------------------------------------------
-
-    csv_column_order = [
-        "dtype",
-        "sched_type",
-        "mem_scope",
-        "num_vectors_per_tensor",
-        "row_status",
-        "timings_min_usecs",
-        "timings_max_usecs",
-        "timings_median_usecs",
-        "timings_mean_usecs",
-        "timings_stddev_usecs",
-        "comments",
-    ]
-
-    # Hexagon v69 allows more dtypes, but we're sticking with v68 for now.
-    for dtype in [
-        "int8",
-    ]:
-
-        # These numbers are only meaningful in the context of this script.
-        for sched_type in [
-            1,
-            2,
-        ]:
-
-            for mem_scope in ["global", "global.vtcm"]:
-
-                # These numbers are fairly arbitrary, but they're meant to 
stress memory/caches to
-                # various extents.
-                for num_vectors_per_tensor in [
-                    1,
-                    16,
-                    64,
-                    512,
-                    2048,
-                ]:
-
-                    test_one_config(dtype, sched_type, mem_scope, 
num_vectors_per_tensor)
-
-                    # Report our progress.
-                    bt.print_csv(sys.stdout, csv_column_order)
-
-    print("-" * 80)
-    print(f"OUTPUT DIRECTORY: {host_output_dir}")
-    print("-" * 80)
-    print()
-
-    tabular_output_filename = os.path.join(host_output_dir, 
"benchmark-results.csv")
-    with open(tabular_output_filename, "w") as csv_file:
-        bt.print_csv(csv_file, csv_column_order)
-    print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
-
-    if bt.has_fail() > 0:
-        pytest.fail("At least one benchmark configuration failed", 
pytrace=False)
diff --git a/tests/python/contrib/test_hexagon/benchmark_util.py 
b/tests/python/contrib/test_hexagon/benchmark_util.py
index 5a75e9a6e8..113c7780c1 100644
--- a/tests/python/contrib/test_hexagon/benchmark_util.py
+++ b/tests/python/contrib/test_hexagon/benchmark_util.py
@@ -139,3 +139,37 @@ class BenchmarksTable:
                     csv_line_dict[col_name] = str_value
 
             writer.writerow(csv_line_dict)
+
+
+def get_benchmark_id(keys_dict):
+    """
+    Given a dictionary with the distinguishing characteristics of a particular 
benchmark
+    line item, compute a string that uniquely identifies the benchmark.
+
+    The returned string:
+    - is a valid directory name on the host's file systems, and
+    - should be easy for humans to parse
+
+    Note that the insertion order for `keys_dict` affects the computed name.
+    """
+    # Creat a copy, because we might be modifying it.
+    d = dict(keys_dict)
+
+    # Sniff for shape-like lists, because we want them in a form that's both
+    # readable and filesystem-friendly...
+    for k, v in d.items():
+        if isinstance(v, list) or isinstance(v, tuple):
+            v2 = "_".join([str(x) for x in v])
+            d[k] = v2
+
+    return "-".join([f"{k}:{v}" for k, v in d.items()])
+
+
+def get_benchmark_decription(keys_dict):
+    """
+    Similar to `get_benchmark_id`, but the focus is on human-readability.
+
+    The returned string contains no line-breaks, but may contain spaces and
+    other characters that make it unsuitable for use as a filename.
+    """
+    return " ".join([f"{k}={v}" for k, v in keys_dict.items()])

[tvm] branch main updated: [hexagon][testing] add TIRScript elemwise-add (#11490)

Reply via email to