This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 383c4465c2 [Tests][AArch64] Make SVE codegen assertions robust across
LLVM versions (#19752)
383c4465c2 is described below
commit 383c4465c2ba381b6e9f58ff60d982d3c032b0e0
Author: Shushi Hong <[email protected]>
AuthorDate: Sat Jun 13 07:31:05 2026 -0400
[Tests][AArch64] Make SVE codegen assertions robust across LLVM versions
(#19752)
`tests/python/codegen/test_target_codegen_aarch64.py` cross-compiles
AArch64 SVE kernels and regex-matches the generated assembly for
specific instruction forms and counts. Several of those assertions
encode the exact code shape produced by the LLVM versions used in CI
(15-17). On a TVM built against LLVM 20 the tests fail, even though the
emitted IR is correct (+sve target-features and vscale_range(1,16) are
present) -- the difference is entirely inside LLVM's loop vectorizer /
cost model, not in TVM's codegen.
---
.../python/codegen/test_target_codegen_aarch64.py | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/tests/python/codegen/test_target_codegen_aarch64.py
b/tests/python/codegen/test_target_codegen_aarch64.py
index 1c5618bb6b..8e16949d2b 100644
--- a/tests/python/codegen/test_target_codegen_aarch64.py
+++ b/tests/python/codegen/test_target_codegen_aarch64.py
@@ -179,9 +179,19 @@ def test_muladd(dtype):
assembly = f.inspect_source("asm")
loads = re.findall("ld1[whdb] { z", assembly)
matches = re.findall(
- r"mad|mla\tz[0-9].[shdb],( p[0-9]/[m],)? z[0-9].[shdb],
z[0-9].[shdb]", assembly
+ # Group the mad|mla alternation: a top-level alternation would let a
bare
+ # "mad" match anywhere in the assembly (e.g. inside scalar "fmadd").
+ r"(?:mad|mla)\tz[0-9].[shdb],( p[0-9]/[m],)? z[0-9].[shdb],
z[0-9].[shdb]",
+ assembly,
)
+ if llvm_version_major() >= 18 and dtype in ("float", "float16"):
+ # Newer LLVM cost models (observed with LLVM 20) prefer a fixed-width
+ # NEON main loop over a scalable SVE loop for floating-point fmuladd
+ # on generic AArch64 targets, so also accept the NEON form.
+ loads += re.findall(r"ld[rp]\tq[0-9]", assembly)
+ matches += re.findall(r"fml[as]\tv[0-9]+\.[0-9]+[hs]", assembly)
+
assert len(loads) > 1
assert len(matches) > 1
@@ -385,7 +395,10 @@ def test_eq(dtype):
)
assert len(loads) > 1
- assert len(matches) > 1
+ # The number of SVE compares depends on the LLVM cost model: LLVM <= 17
+ # interleaves the scalable loop by two, while LLVM 20 emits a fixed-width
+ # NEON main loop with a single predicated SVE epilogue.
+ assert len(matches) > 0
@pytest.mark.skipif(
@@ -424,7 +437,10 @@ def test_neq(dtype):
)
assert len(loads) > 1
- assert len(matches) > 1
+ # The number of SVE compares depends on the LLVM cost model: LLVM <= 17
+ # interleaves the scalable loop by two, while LLVM 20 emits a fixed-width
+ # NEON main loop with a single predicated SVE epilogue.
+ assert len(matches) > 0
@pytest.mark.skipif(