https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/154144
>From 58a564fdc949f2591c7611e190138418d3d62bdc Mon Sep 17 00:00:00 2001 From: CarolineConcatto <[email protected]> Date: Mon, 18 Aug 2025 15:11:25 +0000 Subject: [PATCH] [NFC] Update attributes for FP8 instructions using new target memory locations --- .../test/CodeGen/AArch64/attr-fp8-function.c | 25 ++++-- llvm/include/llvm/IR/IntrinsicsAArch64.td | 81 ++++++++++++------- 2 files changed, 67 insertions(+), 39 deletions(-) diff --git a/clang/test/CodeGen/AArch64/attr-fp8-function.c b/clang/test/CodeGen/AArch64/attr-fp8-function.c index 54bfd177bd809..8fdf384ed93e5 100644 --- a/clang/test/CodeGen/AArch64/attr-fp8-function.c +++ b/clang/test/CodeGen/AArch64/attr-fp8-function.c @@ -18,20 +18,29 @@ svfloat16_t test_svcvtlt2_f16_mf8(svmfloat8_t zn, fpm_t fpm) __arm_streaming { return svcvtlt2_f16_mf8_fpm(zn, fpm); } -// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR3:#.*]] -// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR4:#.*]] +// CHECK: declare void @llvm.aarch64.set.fpmr(i64) [[ATTR2:#.*]] +// CHECK: declare <vscale x 8 x half> @llvm.aarch64.sve.fp8.cvtlt2.nxv8f16(<vscale x 16 x i8>) [[ATTR3:#.*]] // SME +// With only fprm as inaccessible memory svfloat32_t test_svmlalltt_lane_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm) __arm_streaming { return svmlalltt_lane_f32_mf8_fpm(zda, zn, zm, 7, fpm); } -// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR4]] +// CHECK: declare <vscale x 4 x float> @llvm.aarch64.sve.fp8.fmlalltt.lane.nxv4f32(<vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR3:#.*]] -// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR4]] +// With fpmr and za as incaccessible memory +void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svdot_lane_za32_mf8_vg1x2_fpm(slice, zn, zm, 3, fpmr); +} + +// CHECK: declare void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32 immarg) [[ATTR5:#.*]] +// CHECK: declare <16 x i8> @llvm.aarch64.neon.fp8.fcvtn.v16i8.v8f16(<8 x half>, <8 x half>) [[ATTR3]] -// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}} -// CHECK: attributes [[ATTR2:#.*]] = {{{.*}}} -// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } -// CHECK: attributes [[ATTR4]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: read) } +// CHECK: attributes [[ATTR0:#.*]] = {{{.*}}} +// CHECK: attributes [[ATTR1:#.*]] = {{{.*}}} +// CHECK: attributes [[ATTR2]] = { nocallback nofree nosync nounwind willreturn memory(target_mem0: write) } +// CHECK: attributes [[ATTR3]] = { nocallback nofree nosync nounwind willreturn memory(target_mem0: read) } +// CHECK: attributes [[ATTR4:#.*]] = {{{.*}}} +// CHECK: attributes [[ATTR5:#.*]] = { nocallback nofree nosync nounwind willreturn memory(target_mem0: read, target_mem1: readwrite) } diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index fd56e0e3f9e7b..d0fc1af7a14ce 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -737,7 +737,7 @@ let TargetPrefix = "aarch64" in { class RNDR_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; class FPMR_Set_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrWrite<[TargetMem0]>]>; } // FP environment registers. @@ -962,7 +962,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat // Conversions class AdvSIMD_FP8_1VectorArg_Long_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrRead<[TargetMem0]>]>; def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; @@ -971,13 +971,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; def int_aarch64_neon_fp8_fcvtn2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; // Dot-product class AdvSIMD_FP8_DOT_Intrinsic @@ -985,14 +985,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; class AdvSIMD_FP8_DOT_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrReadMem, IntrRead<[TargetMem0]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic; def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic; @@ -1007,7 +1007,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; class AdvSIMD_FP8_FMLA_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -1015,7 +1015,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrReadMem, IntrRead<[TargetMem0]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic; def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic; @@ -3060,6 +3060,12 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + class SME_FP8_OuterProduct_QuarterTile_Single_Single + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>, IntrHasSideEffects]>; + class SME_OuterProduct_QuarterTile_Single_Multi : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -3067,6 +3073,13 @@ let TargetPrefix = "aarch64" in { LLVMMatchType<0>, LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + class SME_FP8_OuterProduct_QuarterTile_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>, IntrHasSideEffects]>; + class SME_OuterProduct_QuarterTile_Multi_Multi : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -3075,6 +3088,14 @@ let TargetPrefix = "aarch64" in { LLVMMatchType<0>, LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + class SME_FP8_OuterProduct_QuarterTile_Multi_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>, IntrHasSideEffects]>; + // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { foreach za = ["", "_za64"] in { @@ -3117,10 +3138,10 @@ let TargetPrefix = "aarch64" in { // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product foreach za = ["za16", "za32"] in { - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi; } class SME_AddVectorToTile_Intrinsic @@ -4051,7 +4072,7 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Cvt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt; def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt; @@ -4062,29 +4083,26 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Narrow_Cvt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrReadMem, IntrRead<[TargetMem0]>]>; def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; // Dot product class SVE2_FP8_FMLA_FDOT : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrReadMem, IntrRead<[TargetMem0]>]>; class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; - + [IntrReadMem, IntrRead<[TargetMem0]>, ImmArg<ArgIndex<3>>]>; def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; @@ -4110,33 +4128,33 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; class SVE2_FP8_CVT_Single_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; class SME_FP8_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i1_ty, llvm_nxv16i1_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>; + [ImmArg<ArgIndex<0>>, IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; class SME_FP8_ZA_LANE_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>, ImmArg<ArgIndex<3>>]>; class SME_FP8_ZA_LANE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>, ImmArg<ArgIndex<4>>]>; class SME_FP8_ZA_LANE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -4144,35 +4162,36 @@ let TargetPrefix = "aarch64" in { llvm_nxv16i8_ty, llvm_i32_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>; + class SME_FP8_ZA_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; class SME_FP8_ZA_SINGLE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; class SME_FP8_ZA_SINGLE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; class SME_FP8_ZA_MULTI_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; class SME_FP8_ZA_MULTI_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[TargetMem0, TargetMem1]>, IntrWrite<[TargetMem1]>]>; // // CVT from FP8 to half-precision/BFloat16 multi-vector // @@ -4191,7 +4210,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fp8_cvt_x2 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrReadMem, IntrRead<[TargetMem0]>]>; def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
