https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/154144
>From 7e38391d722a92d28946b07f9425b622d1a92bf6 Mon Sep 17 00:00:00 2001 From: CarolineConcatto <[email protected]> Date: Thu, 4 Jun 2026 14:11:02 +0000 Subject: [PATCH 1/2] [TableGen] Add ArgMem memory location This will allow to use IntrRead/IntrWrite with ArgMem. So this: ''' [IntrWriteMem , IntrInaccessibleMemOrArgMemOnly] ''' could become this: ''' [IntrWriteMem, IntrWrite<[ArgMem, InaccessibleMem]>] ''' --- llvm/include/llvm/IR/Intrinsics.td | 1 + .../TableGen/target-mem-intrinsic-attrs.td | 109 ++++++++---------- .../TableGen/Basic/CodeGenIntrinsics.cpp | 1 + 3 files changed, 49 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 993ddd7e33701..a196c8a3967ba 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -59,6 +59,7 @@ class IntrinsicMemoryLocation; // TODO: Populate with all IRMemLocation enum values and update // getValueAsIRMemLocation accordingly. +def ArgMem : IntrinsicMemoryLocation; def InaccessibleMem : IntrinsicMemoryLocation; def TargetMem0 : IntrinsicMemoryLocation; def TargetMem1 : IntrinsicMemoryLocation; diff --git a/llvm/test/TableGen/target-mem-intrinsic-attrs.td b/llvm/test/TableGen/target-mem-intrinsic-attrs.td index fc9c3321ad9e9..b3e1b5a8a55e2 100644 --- a/llvm/test/TableGen/target-mem-intrinsic-attrs.td +++ b/llvm/test/TableGen/target-mem-intrinsic-attrs.td @@ -14,65 +14,50 @@ def int_aarch64_set_inaccessible_mem : DefaultAttrsIntrinsic<[], [llvm_i64_ty] def int_aarch64_set_target_mem0 : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrWrite<[TargetMem0]>]>; -// CHECK: static AttributeSet getIntrinsicFnAttributeSet(LLVMContext &C, unsigned ID) { -// CHECK-NEXT: switch (ID) { -// CHECK-NEXT: default: llvm_unreachable("Invalid attribute set number"); -// CHECK-NEXT: case 0: // llvm.aarch64.get.target.mem0.mem1 -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Ref, TargetMem1: Ref -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(1280)), -// CHECK-NEXT: }); -// CHECK-NEXT: case 1: // llvm.aarch64.get.target.mem0.set.target.mem1 -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Ref, TargetMem1: Mod -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(2304)), -// CHECK-NEXT: }); -// CHECK-NEXT: case 2: // llvm.aarch64.get.target.mem1 -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: Ref -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(1024)), -// CHECK-NEXT: }); -// CHECK-NEXT: case 3: // llvm.aarch64.get.target.mem1.set.target.mem1 -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: ModRef -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(3072)), -// CHECK-NEXT: }); -// CHECK-NEXT: case 4: // llvm.aarch64.set.inaccessible.mem -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: Mod, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: NoModRef -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(8)), -// CHECK-NEXT: }); -// CHECK-NEXT: case 5: // llvm.aarch64.set.target.mem0 -// CHECK-NEXT: return AttributeSet::get(C, { -// CHECK-NEXT: Attribute::get(C, Attribute::NoUnwind), -// CHECK-NEXT: Attribute::get(C, Attribute::NoCallback), -// CHECK-NEXT: Attribute::get(C, Attribute::NoSync), -// CHECK-NEXT: Attribute::get(C, Attribute::NoFree), -// CHECK-NEXT: Attribute::get(C, Attribute::WillReturn), -// CHECK-NEXT: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Mod, TargetMem1: NoModRef -// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(512)), +def int_aarch64_argmem_read_target_mem1_write : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrRead<[ArgMem]>, IntrWrite<[TargetMem1]>]>; + +def int_aarch64_argmem_target_mem1_read_target_mem1_write : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrRead<[ArgMem, TargetMem1]>, IntrWrite<[TargetMem1]>]>; + +def int_aarch64_target_mem1_read_argmem_write : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrRead<[TargetMem1]>, IntrWrite<[ArgMem]>]>; + +def int_aarch64_target_mem0_read_argmem_write : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrRead<[TargetMem0]>, IntrWrite<[ArgMem]>]>; + +// CHECK-LABEL: case 0: // llvm.aarch64.argmem.read.target.mem1.write +// CHECK: // ArgMem: Ref, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: Mod +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(2049)), + +// CHECK-LABEL: case 1: // llvm.aarch64.argmem.target.mem1.read.target.mem1.write +// CHECK: // ArgMem: Ref, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: ModRef +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(3073)), + +// CHECK-LABEL: case 2: // llvm.aarch64.get.target.mem0.mem1 +// CHECK: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Ref, TargetMem1: Ref +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(1280)), + +// CHECK-LABEL: case 3: // llvm.aarch64.get.target.mem0.set.target.mem1 +// CHECK: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Ref, TargetMem1: Mod +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(2304)), + +// CHECK-LABEL: case 4: // llvm.aarch64.get.target.mem1 +// CHECK: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: Ref +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(1024)), + +// CHECK-LABEL: case 5: // llvm.aarch64.get.target.mem1.set.target.mem1 +// CHECK: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: ModRef +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(3072)), + +// CHECK-LABEL: case 6: // llvm.aarch64.set.inaccessible.mem +// CHECK: // ArgMem: NoModRef, InaccessibleMem: Mod, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: NoModRef +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(8)), + +// CHECK-LABEL: case 7: // llvm.aarch64.set.target.mem0 +// CHECK: // ArgMem: NoModRef, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Mod, TargetMem1: NoModRef +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(512)), + +// CHECK-LABEL: case 8: // llvm.aarch64.target.mem0.read.argmem.write +// CHECK: // ArgMem: Mod, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: Ref, TargetMem1: NoModRef +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(258)), + +// CHECK-LABEL: case 9: // llvm.aarch64.target.mem1.read.argmem.write +// CHECK: // ArgMem: Mod, InaccessibleMem: NoModRef, ErrnoMem: NoModRef, Other: NoModRef, TargetMem0: NoModRef, TargetMem1: Ref +// CHECK-NEXT: Attribute::getWithMemoryEffects(C, MemoryEffects::createFromIntValue(1026)), diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp index 05ce7f22468c3..36898ae014e3a 100644 --- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp +++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp @@ -535,6 +535,7 @@ CodeGenIntrinsic::getValueAsIRMemLocation(const Record *R) const { StringRef Name = R->getName(); IRMemLocation Loc = StringSwitch<IRMemLocation>(Name) + .Case("ArgMem", IRMemLocation::ArgMem) .Case("TargetMem0", IRMemLocation::TargetMem0) .Case("TargetMem1", IRMemLocation::TargetMem1) .Case("InaccessibleMem", IRMemLocation::InaccessibleMem) >From 39ef7d0117b277b210f53aa7ea15df2c43c8a3be Mon Sep 17 00:00:00 2001 From: CarolineConcatto <[email protected]> Date: Mon, 18 Aug 2025 15:11:25 +0000 Subject: [PATCH 2/2] [NFC][AArch64][TableGen] Define ZA, ZT0 and FPMR memory defvars Introduce TableGen defvars for the inaccessible memory effects used to model accesses to ZA, ZT0 and FPMR in IntrinsicsAArch64.td. This is a preparatory cleanup for a follow-up patch that will replace these uses of InaccessibleMem with target-specific memory locations. Other uses of inaccessible memory in the file are left unchanged because they are unrelated to ZA, ZT0 or FPMR. This preserves the existing memory effects. In particular, intrinsics that currently access both argument memory and inaccessible memory keep the same ArgMem/InaccessibleMem read/write modelling. --- .../fp8-intrinsics/acle_sme2_fp8_fmopa_licm.c | 83 ++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 272 ++++++++++-------- .../CodeGen/AArch64/sme-intrinsics-stores.ll | 10 +- 3 files changed, 247 insertions(+), 118 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa_licm.c diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa_licm.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa_licm.c new file mode 100644 index 0000000000000..c049879603b34 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa_licm.c @@ -0,0 +1,83 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6 +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ +// RUN: -target-feature +sme-f8f32 -O2 -emit-llvm -o - %s | FileCheck %s + +#include <arm_sme.h> + +// FIXME: LICM should be able to hoist llvm.aarch64.set.fpmr into the loop +// preheader. This will be possible once the FP8 FMOPA intrinsics access target +// memory while llvm.aarch64.set.fpmr only writes inaccessible memory, making +// the two operations non-aliasing. + + // CHECK: Function Attrs: nofree norecurse nosync nounwind memory(argmem: read, inaccessiblemem: readwrite) vscale_range(1,16) +// CHECK-LABEL: define dso_local ptr @test_fpmr_hoist( +// CHECK-SAME: ptr noundef readonly captures(ret: address, provenance) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], i64 noundef [[N:%.*]], i64 noundef [[FPMR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*]]: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() +// CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +// CHECK-NEXT: [[CMP_NOT15:%.*]] = icmp eq i64 [[N]], 0 +// CHECK-NEXT: br i1 [[CMP_NOT15]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_LR_PH:.*]] +// CHECK: [[FOR_BODY_LR_PH]]: +// CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i64 [[TMP0]], 5 +// CHECK-NEXT: br label %[[FOR_BODY:.*]] +// CHECK: [[FOR_COND_CLEANUP]]: +// CHECK-NEXT: [[A_ADDR_0_LCSSA:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[ADD_PTR2:%.*]], %[[FOR_BODY]] ] +// CHECK-NEXT: ret ptr [[A_ADDR_0_LCSSA]] +// CHECK: [[FOR_BODY]]: +// CHECK-NEXT: [[A_ADDR_018:%.*]] = phi ptr [ [[A]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_PTR2]], %[[FOR_BODY]] ] +// CHECK-NEXT: [[B_ADDR_017:%.*]] = phi ptr [ [[B]], %[[FOR_BODY_LR_PH]] ], [ [[ADD_PTR4:%.*]], %[[FOR_BODY]] ] +// CHECK-NEXT: [[I_016:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i8>, ptr [[A_ADDR_018]], align 16, !tbaa [[__MFP8_TBAA5:![0-9]+]] +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_018]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i8>, ptr [[ADD_PTR]], align 16, !tbaa [[__MFP8_TBAA5]] +// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i8>, ptr [[B_ADDR_017]], align 16, !tbaa [[__MFP8_TBAA5]] +// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds nuw i8, ptr [[B_ADDR_017]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i8>, ptr [[ADD_PTR1]], align 16, !tbaa [[__MFP8_TBAA5]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmopa.za32(i32 0, <vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP4]]) +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmopa.za32(i32 0, <vscale x 16 x i1> splat (i1 true), <vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP5]]) +// CHECK-NEXT: [[ADD_PTR2]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_018]], i64 [[MUL]] +// CHECK-NEXT: [[ADD_PTR4]] = getelementptr inbounds nuw i8, ptr [[B_ADDR_017]], i64 [[MUL]] +// CHECK-NEXT: [[INC]] = add nuw i64 [[I_016]], 1 +// CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +// CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +// +mfloat8_t *test_fpmr_hoist(mfloat8_t *a, mfloat8_t *b, unsigned long n, + fpm_t fpmr) __arm_streaming __arm_inout("za") { + const svbool_t pg = svptrue_b8(); + const unsigned long step = svcntb(); + + for (unsigned long i = 0; i != n; ++i) { + svmfloat8_t a0 = svld1_mf8(pg, a); + svmfloat8_t a1 = svld1_mf8(pg, a + step); + svmfloat8_t b0 = svld1_mf8(pg, b); + svmfloat8_t b1 = svld1_mf8(pg, b + step); + + svmopa_za32_mf8_m_fpm(0, pg, pg, a0, b0, fpmr); + svmopa_za32_mf8_m_fpm(0, pg, pg, a1, b1, fpmr); + + a += step * 2; + b += step * 2; + } + + return a; +} +//. +// CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind memory(argmem: read, inaccessiblemem: readwrite) vscale_range(1,16) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme,+sme-f8f32" } +// CHECK: attributes #[[ATTR1:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +// CHECK: attributes #[[ATTR2:[0-9]+]] = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +// CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +//. +// CHECK: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +// CHECK: [[META1:![0-9]+]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +// CHECK: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} +// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"Simple C/C++ TBAA"} +// CHECK: [[__MFP8_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +// CHECK: [[META6]] = !{!"__mfp8", [[META3]], i64 0} +// CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]} +// CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} +//. diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 48c38fb2c2c9f..880f0114a0717 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -737,6 +737,11 @@ def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic; def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic; def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; +// Maps Memory locations to registers +defvar FPMR = InaccessibleMem; +defvar ZT0 = InaccessibleMem; +defvar ZA = InaccessibleMem; + let TargetPrefix = "aarch64" in { class FPENV_Get_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; @@ -745,7 +750,7 @@ let TargetPrefix = "aarch64" in { class RNDR_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; class FPMR_Set_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrWrite<[FPMR]>]>; } // FP environment registers. @@ -970,7 +975,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat // Conversions class AdvSIMD_FP8_1VectorArg_Long_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrRead<[FPMR]>]>; def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; @@ -979,13 +984,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; def int_aarch64_neon_fp8_fcvtn2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; // Dot-product class AdvSIMD_FP8_DOT_Intrinsic @@ -993,14 +998,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; class AdvSIMD_FP8_DOT_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrReadMem, IntrRead<[FPMR]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic; def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic; @@ -1015,7 +1020,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; class AdvSIMD_FP8_FMLA_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -1023,7 +1028,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrReadMem, IntrRead<[FPMR]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic; def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic; @@ -2839,7 +2844,7 @@ def int_aarch64_sve_fmmla def int_aarch64_sve_fp8_fmmla : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; // // SVE ACLE: 7.2. BFloat16 extensions @@ -2940,48 +2945,57 @@ def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic<[IntrSpeculatable // Scalable Matrix Extension (SME) Intrinsics let TargetPrefix = "aarch64" in { - class SME_Load_Store_Intrinsic<LLVMType pred_ty> + class SME_Load_Intrinsic<LLVMType pred_ty> + : DefaultAttrsIntrinsic<[], + [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrRead<[ArgMem, ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<2>>]>; + + class SME_Store_Intrinsic<LLVMType pred_ty> : DefaultAttrsIntrinsic<[], - [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<2>>]>; + [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrRead<[ZA, ArgMem]>, ImmArg<ArgIndex<2>>]>; // Loads - def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; - def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; - def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; - def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; - def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; - def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; - def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; - def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; + def int_aarch64_sme_ld1b_horiz : SME_Load_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_ld1h_horiz : SME_Load_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_ld1w_horiz : SME_Load_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_ld1d_horiz : SME_Load_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_ld1q_horiz : SME_Load_Intrinsic<llvm_nxv1i1_ty>; + def int_aarch64_sme_ld1b_vert : SME_Load_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_ld1h_vert : SME_Load_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_ld1w_vert : SME_Load_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_ld1d_vert : SME_Load_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_ld1q_vert : SME_Load_Intrinsic<llvm_nxv1i1_ty>; // Stores - def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; - def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; - def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; - def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; - def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>; - def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>; - def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>; - def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>; - def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>; + def int_aarch64_sme_st1b_horiz : SME_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_st1h_horiz : SME_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_st1w_horiz : SME_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_st1d_horiz : SME_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_st1q_horiz : SME_Store_Intrinsic<llvm_nxv1i1_ty>; + def int_aarch64_sme_st1b_vert : SME_Store_Intrinsic<llvm_nxv16i1_ty>; + def int_aarch64_sme_st1h_vert : SME_Store_Intrinsic<llvm_nxv8i1_ty>; + def int_aarch64_sme_st1w_vert : SME_Store_Intrinsic<llvm_nxv4i1_ty>; + def int_aarch64_sme_st1d_vert : SME_Store_Intrinsic<llvm_nxv2i1_ty>; + def int_aarch64_sme_st1q_vert : SME_Store_Intrinsic<llvm_nxv1i1_ty>; // Spill + fill - class SME_LDR_STR_ZA_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly]>; - def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic; - def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic; + def int_aarch64_sme_ldr : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], + [IntrRead<[ArgMem]>, IntrWrite<[ZA]>]>; + def int_aarch64_sme_str : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], + [IntrRead<[ZA]>, IntrWrite<[ArgMem]>]>; class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>; + llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrRead<[ZA]>, + ImmArg<ArgIndex<2>>]>; class SME_VectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic; def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic; @@ -2996,13 +3010,13 @@ let TargetPrefix = "aarch64" in { class SME_MOVAZ_TileToVector_X2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>; class SME_MOVAZ_TileToVector_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic; def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic; @@ -3013,7 +3027,7 @@ let TargetPrefix = "aarch64" in { class SME_MOVAZ_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic; def int_aarch64_sme_readz_vert : SME_MOVAZ_TileToVector_Intrinsic; @@ -3024,23 +3038,24 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_readz_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty], - [IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>]>; def int_aarch64_sme_readz_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty], - [IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>]>; def int_aarch64_sme_write_lane_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, + IntrWrite<[ZT0]>]>; def int_aarch64_sme_write_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>; + [ImmArg<ArgIndex<0>>, IntrWriteMem, IntrWrite<[ZT0]>]>; - def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrWrite<[ZA]>, ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">; class SME_OuterProduct_Intrinsic @@ -3049,7 +3064,8 @@ let TargetPrefix = "aarch64" in { LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; @@ -3067,17 +3083,32 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; class SME_OuterProduct_QuarterTile_Single_Single + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Single_Single : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Single_Multi : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Multi_Multi : DefaultAttrsIntrinsic<[], @@ -3085,7 +3116,16 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Multi_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { @@ -3117,8 +3157,8 @@ let TargetPrefix = "aarch64" in { LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>, - IntrInaccessibleMemOnly]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>, IntrRead<[ZA]>, + IntrWrite<[ZA]>]>; def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; @@ -3129,10 +3169,10 @@ let TargetPrefix = "aarch64" in { // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product foreach za = ["za16", "za32"] in { - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi; } class SME_AddVectorToTile_Intrinsic @@ -3140,7 +3180,8 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<0>>]>; def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; @@ -3164,9 +3205,9 @@ let TargetPrefix = "aarch64" in { [IntrNoMem, IntrHasSideEffects]>; def int_aarch64_sme_za_enable - : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrWrite<[ZA]>]>; def int_aarch64_sme_za_disable - : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrWrite<[ZA]>]>; // Clamp // @@ -3255,56 +3296,59 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_Single_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<3>>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<4>>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg<ArgIndex<6>>]>; class SVE2_VG2_Multi_Imm_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], @@ -3323,14 +3367,14 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_ZA_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SVE2_VG2_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], @@ -3448,50 +3492,50 @@ let TargetPrefix = "aarch64" in { class SME2_ZA_ArrayVector_Read_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[ZA]>]>; class SME2_ZA_ArrayVector_Read_VG4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[ZA]>]>; class SME2_Matrix_TileVector_Read_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[ZA]>]>; class SME2_Matrix_TileVector_Read_VG4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[ZA]>]>; class SME2_ZA_ArrayVector_Write_VG2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; class SME2_ZA_ArrayVector_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; class SME2_Matrix_TileVector_Write_VG2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + [IntrWriteMem, IntrWrite<[ZA]>, ImmArg<ArgIndex<0>>]>; class SME2_Matrix_TileVector_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + [IntrWriteMem, IntrWrite<[ZA]>, ImmArg<ArgIndex<0>>]>; class SVE2_VG2_Multi_Single_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], @@ -3657,7 +3701,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector zeroing foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { - def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; + def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrWrite<[ZA]>]>; } // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SVE2_VG2_Multi_Single_Intrinsic; @@ -3795,14 +3839,14 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic; def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic; @@ -3817,7 +3861,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], @@ -3825,7 +3869,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWriteMem, IntrWrite<[ZA]>]>; def int_aarch64_sme_add_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic; def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic; @@ -3946,34 +3990,38 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic; def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic; - class SME_LDR_STR_ZT_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], [IntrInaccessibleMemOrArgMemOnly]>; - def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; - def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; + class SME_LDR_ZT_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], + [IntrRead<[ArgMem]>, IntrWrite<[ZT0]>]>; + class SME_STR_ZT_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], + [IntrRead<[ZT0]>, IntrWrite<[ArgMem]>]>; + def int_aarch64_sme_ldr_zt : SME_LDR_ZT_Intrinsic; + def int_aarch64_sme_str_zt : SME_STR_ZT_Intrinsic; // // Zero ZT0 // - def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>; + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWriteMem, IntrWrite<[ZT0]>]>; // // Lookup table expand one register // def int_aarch64_sme_luti2_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; def int_aarch64_sme_luti4_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; // // Lookup table expand four registers @@ -3981,16 +4029,16 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti2_lane_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; def int_aarch64_sme_luti4_lane_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem, IntrRead<[ZT0]>]>; def int_aarch64_sme_luti4_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg<ArgIndex<0>>, IntrReadMem, IntrRead<[ZT0]>]>; // @@ -4070,7 +4118,7 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Cvt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt; def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt; @@ -4081,29 +4129,26 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Narrow_Cvt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrReadMem, IntrRead<[FPMR]>]>; def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; // Dot product class SVE2_FP8_FMLA_FDOT : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrReadMem, IntrRead<[FPMR]>]>; class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; - + [IntrReadMem, IntrRead<[FPMR]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; @@ -4129,69 +4174,70 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; class SVE2_FP8_CVT_Single_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; class SME_FP8_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i1_ty, llvm_nxv16i1_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>; + [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_LANE_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<3>>]>; class SME_FP8_ZA_LANE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<4>>]>; class SME_FP8_ZA_LANE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<6>>]>; + class SME_FP8_ZA_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_SINGLE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_SINGLE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_MULTI_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_MULTI_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; // // CVT from FP8 to half-precision/BFloat16 multi-vector // @@ -4210,7 +4256,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fp8_cvt_x2 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[FPMR]>]>; def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll index 03c1f28fbaa18..a9bd476c61691 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -18,8 +18,8 @@ define void @st1b(<vscale x 16 x i1> %pg, ptr %ptr, i32 %sliceidx) { define void @st1b_with_addr_offset(<vscale x 16 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1b_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: st1b {za0h.b[w13, 0]}, p0, [x0, x1] ; CHECK-NEXT: st1b {za0v.b[w12, 15]}, p0, [x0, x1] ; CHECK-NEXT: ret @@ -66,8 +66,8 @@ define void @st1h_with_addr_offset(<vscale x 8 x i1> %pg, ptr %ptr, i64 %index, define void @st1w(<vscale x 4 x i1> %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1w: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: st1w {za0h.s[w13, 0]}, p0, [x0] ; CHECK-NEXT: st1w {za1h.s[w13, 0]}, p0, [x0] ; CHECK-NEXT: st1w {za2h.s[w13, 0]}, p0, [x0] @@ -92,8 +92,8 @@ define void @st1w(<vscale x 4 x i1> %pg, ptr %ptr, i32 %sliceidx) { define void @st1w_with_addr_offset(<vscale x 4 x i1> %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1w_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: st1w {za0h.s[w13, 0]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: st1w {za3v.s[w12, 3]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret @@ -107,8 +107,8 @@ define void @st1w_with_addr_offset(<vscale x 4 x i1> %pg, ptr %ptr, i64 %index, define void @st1d(<vscale x 2 x i1> %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: st1d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: st1d {za0h.d[w13, 0]}, p0, [x0] ; CHECK-NEXT: st1d {za1h.d[w13, 0]}, p0, [x0] ; CHECK-NEXT: st1d {za2h.d[w13, 0]}, p0, [x0] @@ -490,8 +490,8 @@ define void @test_sink_tile0_offset_operand(<vscale x 4 x i1> %pg, ptr %src, i32 ; CHECK-NEXT: mov w12, w1 ; CHECK-NEXT: .LBB24_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0] ; CHECK-NEXT: subs w2, w2, #1 +; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0] ; CHECK-NEXT: st1w {za0h.s[w12, 1]}, p0, [x0] ; CHECK-NEXT: st1w {za0h.s[w12, 2]}, p0, [x0] ; CHECK-NEXT: b.ne .LBB24_1 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
