llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Paul Walker (paulwalker-arm)

<details>
<summary>Changes</summary>

This requirement was not intentional, just the result of convenience.

Fixes: https://github.com/llvm/llvm-project/issues/183265

---

Patch is 2.05 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/189992.diff


85 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+2-2) 
- (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+24-38) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c (+40-40) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1_vnum.c 
(+40-40) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ldr.c (+59-26) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1.c (+40-40) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_st1_vnum.c 
(+40-40) 
- (modified) clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_str.c (+59-26) 
- (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_ldr_str_zt.c 
(+4-4) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1.c (+48-48) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ro.c 
(+26-26) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1rq.c 
(+26-26) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sb.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sh.c 
(+32-32) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1sw.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1ub.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uh.c 
(+32-32) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld1uw.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld2.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld3.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ld4.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1.c 
(+100-100) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sb.c 
(+40-40) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sh.c 
(+48-48) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1sw.c 
(+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1ub.c 
(+40-40) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1uh.c 
(+48-48) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldff1uw.c 
(+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1.c 
(+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sb.c 
(+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sh.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1sw.c 
(+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1ub.c 
(+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1uh.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnf1uw.c 
(+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_ldnt1.c 
(+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfb.c (+34-34) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfd.c (+34-34) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfh.c (+34-34) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_prfw.c (+34-34) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1.c (+48-48) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1b.c (+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1h.c (+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st1w.c (+8-8) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st2.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st3.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_st4.c (+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_stnt1.c 
(+52-52) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1.c 
(+30-30) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sb.c 
(+12-12) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sh.c 
(+20-20) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1sw.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1ub.c 
(+12-12) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1uh.c 
(+20-20) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_ldnt1uw.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1.c 
(+30-30) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1b.c 
(+12-12) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1h.c 
(+20-20) 
- (modified) clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_stnt1w.c 
(+16-16) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c 
(+96-96) 
- (modified) 
clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1_single.c (+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c 
(+96-96) 
- (modified) 
clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_load_struct.c 
(+154-154) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_loads.c 
(+46-46) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1.c 
(+96-96) 
- (modified) 
clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_st1_single.c (+24-24) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_stnt1.c 
(+96-96) 
- (modified) clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store.c 
(+92-92) 
- (modified) 
clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_store_struct.c 
(+156-156) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+29-29) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+11-2) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4-2) 
- (modified) llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll (+16-5) 
- (modified) llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll 
(+21-21) 
- (modified) 
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add-mini.ll 
(+1-1) 
- (modified) 
llvm/test/Instrumentation/MemorySanitizer/AArch64/sme2-intrinsics-add.ll (+6-6) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-stores.ll
 (+1-1) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll (+2-2) 
- (modified) llvm/test/Transforms/InstCombine/scalable-trunc.ll (+1-1) 
- (modified) 
llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
 (+35-35) 
- (modified) 
llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll (+56-6) 
- (modified) llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleave4.ll 
(+6-6) 
- (modified) 
llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll 
(+30-30) 
- (modified) 
llvm/test/Transforms/PhaseOrdering/AArch64/sve-interleave-vectorization.ll 
(+3-3) 
- (modified) mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td (+6-2) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index be3cd8a76503b..724802cce24f7 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -238,7 +238,7 @@ def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", 
"csilUcUsUiUlhfdbm", [IsLoad, Verify
 def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", 
[IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
 
 // Load one quadword and replicate (scalar base)
-def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, 
"aarch64_sve_ld1rq", [VerifyRuntimeMode]>;
+def SVLD1RQ : MInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, 
VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1rq">;
 
 // Load N-element structure into N vectors (scalar base)
 def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfdbm", MergeNone, 
"aarch64_sve_ld2_sret", [IsStructLoad, VerifyRuntimeMode]>;
@@ -252,7 +252,7 @@ def SVLD4_VNUM : SInst<"svld4_vnum[_{2}]", "4Pcl", 
"csilUcUsUiUlhfdbm", MergeNon
 
 // Load one octoword and replicate (scalar base)
 let SVETargetGuard = "f64mm", SMETargetGuard = InvalidMode in {
-  def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, 
"aarch64_sve_ld1ro">;
+  def SVLD1RO : MInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], 
MemEltTyDefault, "aarch64_sve_ld1ro">;
 }
 
 let SVETargetGuard = "bf16", SMETargetGuard = InvalidMode in {
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 8ec2f5b83085c..3e0ec2c143428 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -3440,19 +3440,7 @@ Value *CodeGenFunction::EmitSVEGatherLoad(const 
SVETypeFlags &TypeFlags,
   auto *ResultTy = getSVEType(TypeFlags);
   auto *OverloadedTy =
       llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
-
-  Function *F = nullptr;
-  if (Ops[1]->getType()->isVectorTy())
-    // This is the "vector base, scalar offset" case. In order to uniquely
-    // map this built-in to an LLVM IR intrinsic, we need both the return type
-    // and the type of the vector base.
-    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
-  else
-    // This is the "scalar base, vector offset case". The type of the offset
-    // is encoded in the name of the intrinsic. We only need to specify the
-    // return type in order to uniquely map this built-in to an LLVM IR
-    // intrinsic.
-    F = CGM.getIntrinsic(IntID, OverloadedTy);
+  Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
 
   // At the ACLE level there's only one predicate type, svbool_t, which is
   // mapped to <n x 16 x i1>. However, this might be incompatible with the
@@ -3499,18 +3487,7 @@ Value *CodeGenFunction::EmitSVEScatterStore(const 
SVETypeFlags &TypeFlags,
   // it's the first argument. Move it accordingly.
   Ops.insert(Ops.begin(), Ops.pop_back_val());
 
-  Function *F = nullptr;
-  if (Ops[2]->getType()->isVectorTy())
-    // This is the "vector base, scalar offset" case. In order to uniquely
-    // map this built-in to an LLVM IR intrinsic, we need both the return type
-    // and the type of the vector base.
-    F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
-  else
-    // This is the "scalar base, vector offset case". The type of the offset
-    // is encoded in the name of the intrinsic. We only need to specify the
-    // return type in order to uniquely map this built-in to an LLVM IR
-    // intrinsic.
-    F = CGM.getIntrinsic(IntID, OverloadedTy);
+  Function *F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
 
   // Pass 0 when the offset is missing. This can only be applied when using
   // the "vector base" addressing mode for which ACLE allows no offset. The
@@ -3572,9 +3549,12 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(const 
SVETypeFlags &TypeFlags,
       if (BytesPerElt > 1)
         Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
     }
+
+    Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
+    return Builder.CreateCall(F, Ops);
   }
 
-  Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
+  Function *F = CGM.getIntrinsic(IntID, {Ops[1]->getType(), OverloadedTy});
   return Builder.CreateCall(F, Ops);
 }
 
@@ -3589,7 +3569,7 @@ Value *CodeGenFunction::EmitSVEStructLoad(const 
SVETypeFlags &TypeFlags,
   if (Ops.size() > 2)
     BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
 
-  Function *F = CGM.getIntrinsic(IntID, {VTy});
+  Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
   return Builder.CreateCall(F, {Predicate, BasePtr});
 }
 
@@ -3633,7 +3613,7 @@ Value *CodeGenFunction::EmitSVEStructStore(const 
SVETypeFlags &TypeFlags,
   for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
     Operands.push_back(Ops[I]);
   Operands.append({Predicate, BasePtr});
-  Function *F = CGM.getIntrinsic(IntID, { VTy });
+  Function *F = CGM.getIntrinsic(IntID, {VTy, BasePtr->getType()});
 
   return Builder.CreateCall(F, Operands);
 }
@@ -3682,7 +3662,8 @@ Value *CodeGenFunction::EmitSVEPrefetchLoad(const 
SVETypeFlags &TypeFlags,
 
   Value *PrfOp = Ops.back();
 
-  Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
+  llvm::Type *Tys[2] = {Predicate->getType(), BasePtr->getType()};
+  Function *F = CGM.getIntrinsic(BuiltinID, Tys);
   return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
 }
 
@@ -3730,9 +3711,9 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr 
*E,
   if (Ops.size() > 2)
     BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
 
-  Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : 
MemoryTy);
-  auto *Load =
-      cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
+  llvm::Type *Tys[2] = {IsQuadLoad ? VectorTy : MemoryTy, BasePtr->getType()};
+  Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
+  auto *Load = Builder.CreateCall(F, {Predicate, BasePtr});
   auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
   CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
 
@@ -3789,10 +3770,9 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   Value *Val =
       IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
 
-  Function *F =
-      CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
-  auto *Store =
-      cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, 
BasePtr}));
+  llvm::Type *Tys[2] = {IsQuadStore ? VectorTy : MemoryTy, BasePtr->getType()};
+  Function *F = CGM.getIntrinsic(IntrinsicID, Tys);
+  auto *Store = Builder.CreateCall(F, {Val, Predicate, BasePtr});
   auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
   CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
   return Store;
@@ -3829,7 +3809,7 @@ Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags 
&TypeFlags,
   NewOps.push_back(BasePtr);
   NewOps.push_back(Ops[0]);
   NewOps.push_back(RealSlice);
-  Function *F = CGM.getIntrinsic(IntID);
+  Function *F = CGM.getIntrinsic(IntID, BasePtr->getType());
   return Builder.CreateCall(F, NewOps);
 }
 
@@ -3862,7 +3842,7 @@ Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags 
&TypeFlags,
     Ops.push_back(Builder.getInt32(0));
   else
     Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
-  Function *F = CGM.getIntrinsic(IntID, {});
+  Function *F = CGM.getIntrinsic(IntID, Ops[1]->getType());
   return Builder.CreateCall(F, Ops);
 }
 
@@ -4462,6 +4442,12 @@ Value 
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
       if (PredTy->getElementType()->isIntegerTy(1))
         Op = EmitSVEPredicateCast(Op, getSVEType(TypeFlags));
 
+  if (BuiltinID == SME::BI__builtin_sme_svldr_zt ||
+      BuiltinID == SME::BI__builtin_sme_svstr_zt) {
+    Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, Ops[1]->getType());
+    return Builder.CreateCall(F, Ops);
+  }
+
   Function *F =
       TypeFlags.isOverloadNone()
           ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
diff --git a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c 
b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c
index 6f84e7b36b149..598c31aeec489 100644
--- a/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c
+++ b/clang/test/CodeGen/AArch64/sme-intrinsics/acle_sme_ld1.c
@@ -9,17 +9,17 @@
 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_za8(
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x 
i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 
16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x 
i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 
16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z18test_svld1_hor_za8ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 
16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.horiz.p0(<vscale x 
16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
 // CHECK-CXX-NEXT:    ret void
 //
 void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) 
__arm_streaming __arm_out("za") {
@@ -31,18 +31,18 @@ void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, 
const void *ptr) __arm
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
 // CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x 
i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x 
i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 8 
x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z19test_svld1_hor_za16ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
 // CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 
8 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 
x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1h.horiz.p0(<vscale x 
8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
 // CHECK-CXX-NEXT:    ret void
 //
 void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) 
__arm_streaming __arm_out("za") {
@@ -54,18 +54,18 @@ void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, 
const void *ptr) __ar
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
 // CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x 
i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x 
i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 4 
x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z19test_svld1_hor_za32ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
 // CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 
4 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 
x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1w.horiz.p0(<vscale x 
4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
 // CHECK-CXX-NEXT:    ret void
 //
 void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) 
__arm_streaming __arm_out("za") {
@@ -77,18 +77,18 @@ void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, 
const void *ptr) __ar
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
 // CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x 
i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x 
i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 2 
x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z19test_svld1_hor_za64ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
 // CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 
2 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 
x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1d.horiz.p0(<vscale x 
2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
 // CHECK-CXX-NEXT:    ret void
 //
 void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) 
__arm_streaming __arm_out("za") {
@@ -100,16 +100,16 @@ void test_svld1_hor_za64(uint32_t slice_base, svbool_t 
pg, const void *ptr) __ar
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
 // CHECK-C-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x 
i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x 
i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 1 
x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z20test_svld1_hor_za128ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
 // CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 
x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 
x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 
1 x i1> [[TMP0]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1q.horiz.p0(<vscale x 
1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    ret void
 //
 void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) 
__arm_streaming __arm_out("za") {
@@ -120,17 +120,17 @@ void test_svld1_hor_za128(uint32_t slice_base, svbool_t 
pg, const void *ptr) __a
 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_za8(
 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x 
i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-C-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
-// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x 
i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
+// CHECK-C-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
 // CHECK-C-NEXT:    ret void
 //
 // CHECK-CXX-LABEL: define dso_local void 
@_Z18test_svld1_ver_za8ju10__SVBool_tPKv(
 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> 
[[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
+// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert.p0(<vscale x 
16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:    [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
-// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 
x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
+// CHECK-CXX-NEXT:    tail call void...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/189992
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to