Author: Thurston Dang Date: 2025-10-21T18:14:21-07:00 New Revision: 6b9a9cf040d33ad7f9cd563a907b13e373313255
URL: https://github.com/llvm/llvm-project/commit/6b9a9cf040d33ad7f9cd563a907b13e373313255 DIFF: https://github.com/llvm/llvm-project/commit/6b9a9cf040d33ad7f9cd563a907b13e373313255.diff LOG: [msan][test] Add another target("aarch64.svcount") test case (#164343) This shows a crash that happens because MSan tries to check the shadow of a target("aarch64.svcount")-sized argument. This is the followup to https://github.com/llvm/llvm-project/pull/164315. This also does a drive-by fix of those test cases, to remove FileCheck (otherwise, even if opt passed, the test would still XFAIL because FileCheck cannot find any CHECK: assertions). Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll Added: llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll Modified: llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll Removed: ################################################################################ diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll index 1ddcd4b56688c..1c869bd41b931 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount-mini.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s +; RUN: opt -S -passes=msan -mattr=+sme -o - %s ; XFAIL: * diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll index 9caa89de63748..00cf3204464d0 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme-aarch64-svcount.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=msan -mattr=+sme -o - %s | FileCheck %s +; RUN: opt -S -passes=msan -mattr=+sme -o - %s ; XFAIL: * diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll new file mode 100644 index 0000000000000..3f43efa233621 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +; Manually reduced to show MSan leads to a compiler crash + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { + %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %2 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %1, ptr %ptr) + ret void +} diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll new file mode 100644 index 0000000000000..cd04373c11d20 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll @@ -0,0 +1,340 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=msan -mattr=+sme2 -mattr=+sme-i16i64 -mattr=+sme-f64f64 -o - %s + +; XFAIL: * + +; Forked from llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm) + ret void +} + +define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x2.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm) + ret void +} + + +define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm) + ret void +} + +define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) sanitize_memory { + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.single.za.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm) + ret void +} + + +define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) + ret void +} + + +define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x2.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) + ret void +} + + + +define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, + <vscale x 4 x i32> %zm0, <vscale x 4 x i32> %zm1, + <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3) + ret void +} + +define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) sanitize_memory { + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.write.za.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, + <vscale x 2 x i64> %zm0, <vscale x 2 x i64> %zm1, + <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3) + ret void +} + +define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice,<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4i32(i32 %slice.7, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x2.nxv4f32(i32 %slice.7, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 %slice.7, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) + ret void +} + +define void @multi_vector_add_za_vg1x2_f64_tuple(i64 %stride, ptr %ptr) sanitize_memory { +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0 + %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 0 + %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %4, 1 + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %2, <vscale x 2 x double> %5) + call void @llvm.aarch64.sme.add.za64.vg1x2.nxv2f64(i32 0, <vscale x 2 x double> %3, <vscale x 2 x double> %6) + ret void +} + + +define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4i32(i32 %slice.7, + <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, + <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2i64(i32 %slice.7, + <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, + <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 %slice.7, + <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret void +} + +define void @multi_vector_add_za_vg1x4_f32_tuple(i64 %stride, ptr %ptr) sanitize_memory { +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 0 + %3 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 1 + %4 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 2 + %5 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 0 + %8 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 1 + %9 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 2 + %10 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 0 + %13 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 1 + %14 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 2 + %15 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 0 + %18 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 1 + %19 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 2 + %20 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %16, 3 + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %2, <vscale x 4 x float> %7, <vscale x 4 x float> %12, <vscale x 4 x float> %17) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %3, <vscale x 4 x float> %8, <vscale x 4 x float> %13, <vscale x 4 x float> %18) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %4, <vscale x 4 x float> %9, <vscale x 4 x float> %14, <vscale x 4 x float> %19) + call void @llvm.aarch64.sme.add.za32.vg1x4.nxv4f32(i32 0, <vscale x 4 x float> %5, <vscale x 4 x float> %10, <vscale x 4 x float> %15, <vscale x 4 x float> %20) + ret void +} + +define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) sanitize_memory { + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, + <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.add.za64.vg1x4.nxv2f64(i32 %slice.7, + <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, + <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) + ret void +} + + +define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x2_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) sanitize_memory { + %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } + @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, + <vscale x 16 x i8> %zm) + ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_single_x2_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) sanitize_memory { + %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } + @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, + <vscale x 8 x i16> %zm) + ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_single_x2_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) sanitize_memory { + %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } + @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, + <vscale x 4 x i32> %zm) + ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res +} + +define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_single_x2_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) sanitize_memory { + %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } + @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, + <vscale x 2 x i64> %zm) + ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res +} + + +define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_add_single_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8>%zm) sanitize_memory { + %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } + @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, + <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, + <vscale x 16 x i8> %zm) + ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res +} + +define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_add_x4_single_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) sanitize_memory { + %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } + @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, + <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, + <vscale x 8 x i16> %zm) + ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res +} + +define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_add_x4_single_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) sanitize_memory { + %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } + @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, + <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, + <vscale x 4 x i32> %zm) + ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res +} + +define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_add_x4_single_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) sanitize_memory { + %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } + @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, + <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, + <vscale x 2 x i64> %zm) + ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res +} +declare [email protected](i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x i32>,<vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>,<vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare [email protected](i32, <vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>, <vscale x 2 x i64>) +declare [email protected](i32, <vscale x 4 x float>, <vscale x 4 x float>) +declare [email protected](i32, <vscale x 2 x double>, <vscale x 2 x double>) +declare [email protected](i32, <vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>) +declare [email protected](i32, <vscale x 2 x double>, <vscale x 2 x double>,<vscale x 2 x double>, <vscale x 2 x double>) +declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) +declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.add.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) +declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.add.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) +declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.add.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) +declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.add.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
