This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/unity by this push:
new c470e1a922 [Unity][Fix] Fix mismatched intrinsic name (#16418)
c470e1a922 is described below
commit c470e1a922d82d706c1a6a9ea91aa0d83f86fb21
Author: Siyuan Feng <[email protected]>
AuthorDate: Thu Jan 18 06:58:58 2024 +0800
[Unity][Fix] Fix mismatched intrinsic name (#16418)
As we introduced more dp4a intrinsics, we need to update the name in the
testcases.
---
tests/python/integration/test_auto_tensorize.py | 4 ++--
.../meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/tests/python/integration/test_auto_tensorize.py
b/tests/python/integration/test_auto_tensorize.py
index 7831e5c8d7..8b5dd63fc8 100644
--- a/tests/python/integration/test_auto_tensorize.py
+++ b/tests/python/integration/test_auto_tensorize.py
@@ -26,7 +26,7 @@ from tvm import meta_schedule as ms
from tvm import relay
from tvm.meta_schedule.testing import relay_workload
from tvm.meta_schedule.testing.tlcbench import load_quantized_bert_base
-from tvm.tir.tensor_intrin.arm_cpu import DP4A_INTRIN
+from tvm.tir.tensor_intrin.arm_cpu import DP4A_S8S8S32_INTRIN
from tvm.tir.tensor_intrin.rocm import AMDGPU_SDOT4_INTRIN
from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN as VNNI_INTRIN
from tvm.tir.tensor_intrin.x86 import AVX512_DOT_16x4_INTRIN as AVX512_INTRIN
@@ -126,7 +126,7 @@ def _get_sch_rules_for_dp4a(intrin):
]
-SCH_RULES_FOR_DP4A = _get_sch_rules_for_dp4a(DP4A_INTRIN)
+SCH_RULES_FOR_DP4A = _get_sch_rules_for_dp4a(DP4A_S8S8S32_INTRIN)
SCH_RULES_FOR_SDOT4 = _get_sch_rules_for_dp4a(AMDGPU_SDOT4_INTRIN)
POSTPROCS_FOR_VNNI = [
diff --git
a/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
b/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
index 1f682d8018..39675d0531 100644
--- a/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
+++ b/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
@@ -25,7 +25,7 @@ from tvm.meta_schedule.testing.space_generation import (
)
from tvm.script import tir as T
from tvm.target import Target
-from tvm.tir.tensor_intrin.arm_cpu import DP4A_INTRIN
+from tvm.tir.tensor_intrin.arm_cpu import DP4A_S8S8S32_INTRIN
from tvm.tir.tensor_intrin.x86 import AVX512_DOT_16x4_INTRIN as AVX512_INTRIN
from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN as VNNI_INTRIN
@@ -281,7 +281,7 @@ def _check_dp4a_dense(m, n, k, in_dtype, out_dtype,
expected_mods, expected_deci
types=None,
sch_rules=[
ms.schedule_rule.MultiLevelTilingWithIntrin(
- DP4A_INTRIN,
+ DP4A_S8S8S32_INTRIN,
structure="SSSRRSRS",
tile_binds=["blockIdx.x", "vthread.x", "threadIdx.x"],
max_innermost_factor=64,
@@ -343,7 +343,7 @@ def test_dp4a_dense():
W_shared[v_j, v_k_o * 4 : v_k_o * 4 + 4],
)
T.writes(compute_local[v_i, v_j])
- T.block_attr({"meta_schedule.auto_tensorize":
"dp4a"})
+ T.block_attr({"meta_schedule.auto_tensorize":
"dp4a_s8s8s32"})
with T.init():
with T.block("compute_init"):
T.reads()