This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 383c4465c2 [Tests][AArch64] Make SVE codegen assertions robust across 
LLVM versions (#19752)
383c4465c2 is described below

commit 383c4465c2ba381b6e9f58ff60d982d3c032b0e0
Author: Shushi Hong <[email protected]>
AuthorDate: Sat Jun 13 07:31:05 2026 -0400

    [Tests][AArch64] Make SVE codegen assertions robust across LLVM versions 
(#19752)
    
    `tests/python/codegen/test_target_codegen_aarch64.py` cross-compiles
    AArch64 SVE kernels and regex-matches the generated assembly for
    specific instruction forms and counts. Several of those assertions
    encode the exact code shape produced by the LLVM versions used in CI
    (15-17). On a TVM built against LLVM 20 the tests fail, even though the
    emitted IR is correct (+sve target-features and vscale_range(1,16) are
    present) -- the difference is entirely inside LLVM's loop vectorizer /
    cost model, not in TVM's codegen.
---
 .../python/codegen/test_target_codegen_aarch64.py  | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/tests/python/codegen/test_target_codegen_aarch64.py 
b/tests/python/codegen/test_target_codegen_aarch64.py
index 1c5618bb6b..8e16949d2b 100644
--- a/tests/python/codegen/test_target_codegen_aarch64.py
+++ b/tests/python/codegen/test_target_codegen_aarch64.py
@@ -179,9 +179,19 @@ def test_muladd(dtype):
     assembly = f.inspect_source("asm")
     loads = re.findall("ld1[whdb]      { z", assembly)
     matches = re.findall(
-        r"mad|mla\tz[0-9].[shdb],( p[0-9]/[m],)? z[0-9].[shdb], 
z[0-9].[shdb]", assembly
+        # Group the mad|mla alternation: a top-level alternation would let a 
bare
+        # "mad" match anywhere in the assembly (e.g. inside scalar "fmadd").
+        r"(?:mad|mla)\tz[0-9].[shdb],( p[0-9]/[m],)? z[0-9].[shdb], 
z[0-9].[shdb]",
+        assembly,
     )
 
+    if llvm_version_major() >= 18 and dtype in ("float", "float16"):
+        # Newer LLVM cost models (observed with LLVM 20) prefer a fixed-width
+        # NEON main loop over a scalable SVE loop for floating-point fmuladd
+        # on generic AArch64 targets, so also accept the NEON form.
+        loads += re.findall(r"ld[rp]\tq[0-9]", assembly)
+        matches += re.findall(r"fml[as]\tv[0-9]+\.[0-9]+[hs]", assembly)
+
     assert len(loads) > 1
     assert len(matches) > 1
 
@@ -385,7 +395,10 @@ def test_eq(dtype):
     )
 
     assert len(loads) > 1
-    assert len(matches) > 1
+    # The number of SVE compares depends on the LLVM cost model: LLVM <= 17
+    # interleaves the scalable loop by two, while LLVM 20 emits a fixed-width
+    # NEON main loop with a single predicated SVE epilogue.
+    assert len(matches) > 0
 
 
 @pytest.mark.skipif(
@@ -424,7 +437,10 @@ def test_neq(dtype):
     )
 
     assert len(loads) > 1
-    assert len(matches) > 1
+    # The number of SVE compares depends on the LLVM cost model: LLVM <= 17
+    # interleaves the scalable loop by two, while LLVM 20 emits a fixed-width
+    # NEON main loop with a single predicated SVE epilogue.
+    assert len(matches) > 0
 
 
 @pytest.mark.skipif(

Reply via email to