llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir-sme @llvm/pr-subscribers-backend-aarch64 Author: Paul Walker (paulwalker-arm) <details> <summary>Changes</summary> This requirement was not intentional, just the result of convenience. Fixes: https://github.com/llvm/llvm-project/issues/183265 --- Patch is 2.05 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/189992.diff 85 Files Affected: - (modified) clang/include/clang/Basic/arm_sve.td (+2-2) - (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+24-38) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c (+59-26) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c (+59-26) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_ldr_str_zt.c (+4-4) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c (+48-48) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c (+26-26) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c (+26-26) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sb.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sh.c (+32-32) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sw.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ub.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uh.c (+32-32) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uw.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c (+100-100) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sb.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sh.c (+48-48) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sw.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1ub.c (+40-40) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1uh.c (+48-48) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1uw.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sb.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sh.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sw.c (+8-8) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1ub.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1uh.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1uw.c (+8-8) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfb.c (+34-34) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfd.c (+34-34) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfh.c (+34-34) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfw.c (+34-34) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c (+48-48) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1b.c (+8-8) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1h.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1w.c (+8-8) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c (+52-52) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1.c (+30-30) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sb.c (+12-12) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sh.c (+20-20) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sw.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1ub.c (+12-12) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1uh.c (+20-20) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1uw.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1.c (+30-30) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1b.c (+12-12) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1h.c (+20-20) - (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1w.c (+16-16) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c (+96-96) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1_single.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c (+96-96) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c (+154-154) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c (+46-46) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c (+96-96) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1_single.c (+24-24) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c (+96-96) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c (+92-92) - (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c (+156-156) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+29-29) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+11-2) - (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4-2) - (modified) llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll (+16-5) - (modified) llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll (+21-21) - (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll (+1-1) - (modified) llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll (+6-6) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-stores.ll (+1-1) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll (+2-2) - (modified) llvm/test/Transforms/InstCombine/scalable-trunc.ll (+1-1) - (modified) llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll (+35-35) - (modified) llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll (+56-6) - (modified) llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll (+6-6) - (modified) llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll (+30-30) - (modified) llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll (+3-3) - (modified) mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td (+6-2) ``````````diff diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..724802cce24f7 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -238,7 +238,7 @@ def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, Verify def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; +def SVLD1RQ : MInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1rq">; // Load N-element structure into N vectors (scalar base) def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld2_sret", [IsStructLoad, VerifyRuntimeMode]>; @@ -252,7 +252,7 @@ def SVLD4_VNUM : SInst<"svld4_vnum[_{2}]", "4Pcl", "csilUcUsUiUlhfdbm", MergeNon // Load one octoword and replicate (scalar base) let SVETargetGuard = "f64mm", SMETargetGuard = InvalidMode in { - def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld1ro">; + def SVLD1RO : MInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1ro">; } let SVETargetGuard = "bf16", SMETargetGuard = InvalidMode in { diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 8ec2f5b83085c..3e0ec2c143428 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3440,19 +3440,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, auto *ResultTy = getSVEType(TypeFlags); auto *OverloadedTy = llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); - - Function *F = nullptr; - if (Ops[1]->getType()->isVectorTy()) - // This is the "vector base, scalar offset" case. In order to uniquely - // map this built-in to an LLVM IR intrinsic, we need both the return type - // and the type of the vector base. - F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); - else - // This is the "scalar base, vector offset case". The type of the offset - // is encoded in the name of the intrinsic. We only need to specify the - // return type in order to uniquely map this built-in to an LLVM IR - // intrinsic. - F = CGM.getIntrinsic(IntID, OverloadedTy); + Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); // At the ACLE level there's only one predicate type, svbool_t, which is // mapped to <n x 16 x i1>. However, this might be incompatible with the @@ -3499,18 +3487,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, // it's the first argument. Move it accordingly. Ops.insert(Ops.begin(), Ops.pop_back_val()); - Function *F = nullptr; - if (Ops[2]->getType()->isVectorTy()) - // This is the "vector base, scalar offset" case. In order to uniquely - // map this built-in to an LLVM IR intrinsic, we need both the return type - // and the type of the vector base. - F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); - else - // This is the "scalar base, vector offset case". The type of the offset - // is encoded in the name of the intrinsic. We only need to specify the - // return type in order to uniquely map this built-in to an LLVM IR - // intrinsic. - F = CGM.getIntrinsic(IntID, OverloadedTy); + Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); // Pass 0 when the offset is missing. This can only be applied when using // the "vector base" addressing mode for which ACLE allows no offset. The @@ -3572,9 +3549,12 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, if (BytesPerElt > 1) Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); } + + Function *F = CGM.getIntrinsic(IntID, OverloadedTy); + return Builder.CreateCall(F, Ops); } - Function *F = CGM.getIntrinsic(IntID, OverloadedTy); + Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy}); return Builder.CreateCall(F, Ops); } @@ -3589,7 +3569,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, if (Ops.size() > 2) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); - Function *F = CGM.getIntrinsic(IntID, {VTy}); + Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()}); return Builder.CreateCall(F, {Predicate, BasePtr}); } @@ -3633,7 +3613,7 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) Operands.push_back(Ops[I]); Operands.append({Predicate, BasePtr}); - Function *F = CGM.getIntrinsic(IntID, { VTy }); + Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()}); return Builder.CreateCall(F, Operands); } @@ -3682,7 +3662,8 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, Value *PrfOp = Ops.back(); - Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); + llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()}; + Function *F = CGM.getIntrinsic(BuiltinID, Tys); return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp}); } @@ -3730,9 +3711,9 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, if (Ops.size() > 2) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); - Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy); - auto *Load = - cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr})); + llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()}; + Function *F = CGM.getIntrinsic(IntrinsicID, Tys); + auto *Load = Builder.CreateCall(F, {Predicate, BasePtr}); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); @@ -3789,10 +3770,9 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, Value *Val = IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy); - Function *F = - CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy); - auto *Store = - cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr})); + llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()}; + Function *F = CGM.getIntrinsic(IntrinsicID, Tys); + auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr}); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); return Store; @@ -3829,7 +3809,7 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, NewOps.push_back(BasePtr); NewOps.push_back(Ops[0]); NewOps.push_back(RealSlice); - Function *F = CGM.getIntrinsic(IntID); + Function *F = CGM.getIntrinsic(IntID, BasePtr->getType()); return Builder.CreateCall(F, NewOps); } @@ -3862,7 +3842,7 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, Ops.push_back(Builder.getInt32(0)); else Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true); - Function *F = CGM.getIntrinsic(IntID, {}); + Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType()); return Builder.CreateCall(F, Ops); } @@ -4462,6 +4442,12 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, if (PredTy->getElementType()->isIntegerTy(1)) Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags)); + if (BuiltinID == SME::BI__builtin_sme_svldr_zt || + BuiltinID == SME::BI__builtin_sme_svstr_zt) { + Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType()); + return Builder.CreateCall(F, Ops); + } + Function *F = TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c index 6f84e7b36b149..598c31aeec489 100644 --- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c +++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c @@ -9,17 +9,17 @@ // CHECK-C-LABEL: define dso_local void @test_svld1_hor_za8( // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-C-NEXT: entry: -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z18test_svld1_hor_za8ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { @@ -31,18 +31,18 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z19test_svld1_hor_za16ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { @@ -54,18 +54,18 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z19test_svld1_hor_za32ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { @@ -77,18 +77,18 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z19test_svld1_hor_za64ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { @@ -100,16 +100,16 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __ar // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z20test_svld1_hor_za128ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: ret void // void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming __arm_out("za") { @@ -120,17 +120,17 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __a // CHECK-C-LABEL: define dso_local void @test_svld1_ver_za8( // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-C-NEXT: entry: -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 -// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) +// CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) // CHECK-C-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z18test_svld1_ver_za8ju10__SVBool_tPKv( // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15 -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]]) +// CHECK-CXX-NEXT: tail call void... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/189992 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
