(tvm) branch unity updated: [Unity][Fix] Fix mismatched intrinsic name (#16418)

tqchen Wed, 17 Jan 2024 14:59:15 -0800

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/unity by this push:
     new c470e1a922 [Unity][Fix] Fix mismatched intrinsic name (#16418)
c470e1a922 is described below

commit c470e1a922d82d706c1a6a9ea91aa0d83f86fb21
Author: Siyuan Feng <[email protected]>
AuthorDate: Thu Jan 18 06:58:58 2024 +0800

    [Unity][Fix] Fix mismatched intrinsic name (#16418)
    
    As we introduced more dp4a intrinsics, we need to update the name in the
    testcases.
---
 tests/python/integration/test_auto_tensorize.py                     | 4 ++--
 .../meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/python/integration/test_auto_tensorize.py 
b/tests/python/integration/test_auto_tensorize.py
index 7831e5c8d7..8b5dd63fc8 100644
--- a/tests/python/integration/test_auto_tensorize.py
+++ b/tests/python/integration/test_auto_tensorize.py
@@ -26,7 +26,7 @@ from tvm import meta_schedule as ms
 from tvm import relay
 from tvm.meta_schedule.testing import relay_workload
 from tvm.meta_schedule.testing.tlcbench import load_quantized_bert_base
-from tvm.tir.tensor_intrin.arm_cpu import DP4A_INTRIN
+from tvm.tir.tensor_intrin.arm_cpu import DP4A_S8S8S32_INTRIN
 from tvm.tir.tensor_intrin.rocm import AMDGPU_SDOT4_INTRIN
 from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN as VNNI_INTRIN
 from tvm.tir.tensor_intrin.x86 import AVX512_DOT_16x4_INTRIN as AVX512_INTRIN
@@ -126,7 +126,7 @@ def _get_sch_rules_for_dp4a(intrin):
     ]
 
 
-SCH_RULES_FOR_DP4A = _get_sch_rules_for_dp4a(DP4A_INTRIN)
+SCH_RULES_FOR_DP4A = _get_sch_rules_for_dp4a(DP4A_S8S8S32_INTRIN)
 SCH_RULES_FOR_SDOT4 = _get_sch_rules_for_dp4a(AMDGPU_SDOT4_INTRIN)
 
 POSTPROCS_FOR_VNNI = [
diff --git 
a/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py 
b/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
index 1f682d8018..39675d0531 100644
--- a/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
+++ b/tests/python/meta_schedule/test_meta_schedule_schedule_rule_mlt_intrin.py
@@ -25,7 +25,7 @@ from tvm.meta_schedule.testing.space_generation import (
 )
 from tvm.script import tir as T
 from tvm.target import Target
-from tvm.tir.tensor_intrin.arm_cpu import DP4A_INTRIN
+from tvm.tir.tensor_intrin.arm_cpu import DP4A_S8S8S32_INTRIN
 from tvm.tir.tensor_intrin.x86 import AVX512_DOT_16x4_INTRIN as AVX512_INTRIN
 from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN as VNNI_INTRIN
 
@@ -281,7 +281,7 @@ def _check_dp4a_dense(m, n, k, in_dtype, out_dtype, 
expected_mods, expected_deci
         types=None,
         sch_rules=[
             ms.schedule_rule.MultiLevelTilingWithIntrin(
-                DP4A_INTRIN,
+                DP4A_S8S8S32_INTRIN,
                 structure="SSSRRSRS",
                 tile_binds=["blockIdx.x", "vthread.x", "threadIdx.x"],
                 max_innermost_factor=64,
@@ -343,7 +343,7 @@ def test_dp4a_dense():
                                     W_shared[v_j, v_k_o * 4 : v_k_o * 4 + 4],
                                 )
                                 T.writes(compute_local[v_i, v_j])
-                                T.block_attr({"meta_schedule.auto_tensorize": 
"dp4a"})
+                                T.block_attr({"meta_schedule.auto_tensorize": 
"dp4a_s8s8s32"})
                                 with T.init():
                                     with T.block("compute_init"):
                                         T.reads()

(tvm) branch unity updated: [Unity][Fix] Fix mismatched intrinsic name (#16418)

Reply via email to