https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/203349
>From 0de19e0d42312190b2d309570ad41c468b1f6cd8 Mon Sep 17 00:00:00 2001 From: Paul Walker <[email protected]> Date: Thu, 11 Jun 2026 17:33:33 +0100 Subject: [PATCH 1/2] [SVE] Replace unnecessary Intrinsic::aarch64_sve_ptrue construction. Prefer ConstantInt::getTrue() over sve.ptrue(31) when creating all-active boolean vectors. --- clang/lib/CodeGen/CodeGenFunction.h | 1 - clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 9 +-- .../AArch64/sve-intrinsics/acle_sve_dupq.c | 52 ++++++-------- .../Target/AArch64/AArch64ISelLowering.cpp | 37 ++++------ .../AArch64/AArch64TargetTransformInfo.cpp | 20 ++---- .../AArch64/sve-interleaved-accesses.ll | 70 +++++++------------ 6 files changed, 67 insertions(+), 122 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index aeace0d789a61..4757cf1411937 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4877,7 +4877,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef<llvm::Value *> Ops); - llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index ece8ff21561cf..5142ced664c90 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3390,13 +3390,6 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } -llvm::Value * -CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { - Function *Ptrue = - CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); - return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); -} - constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { @@ -4277,7 +4270,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return DupQLane; SVETypeFlags TypeFlags(Builtin->TypeModifier); - Value *Pred = EmitSVEAllTruePred(TypeFlags); + auto *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags)); // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c index 7a645c5c38b8c..d0c2883a34c67 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c @@ -607,9 +607,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR // CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15 // CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0) // CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0) -// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]] +// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]] // // CPP-CHECK-LABEL: @_Z16test_svdupq_n_b8bbbbbbbbbbbbbbbb( // CPP-CHECK-NEXT: entry: @@ -679,9 +678,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR // CPP-CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15 // CPP-CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0) // CPP-CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0) -// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CPP-CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]] +// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]] // svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7, @@ -728,10 +726,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7 // CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0) // CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0) -// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] +// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b16bbbbbbbb( // CPP-CHECK-NEXT: entry: @@ -769,10 +766,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7 // CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0) // CPP-CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0) -// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] +// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7) MODE_ATTR @@ -801,10 +797,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0) // CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0) -// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] +// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b32bbbb( // CPP-CHECK-NEXT: entry: @@ -826,10 +821,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0) // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]] // svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR { @@ -849,10 +843,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR // CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b64bb( // CPP-CHECK-NEXT: entry: @@ -866,10 +859,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR // CPP-CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]] // svbool_t test_svdupq_n_b64(bool x0, bool x1) MODE_ATTR { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 57a2d73e00f57..7dc547de30c73 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18741,17 +18741,13 @@ bool AArch64TargetLowering::lowerInterleavedLoad( Value *PTrue = nullptr; if (UseScalable) { - std::optional<unsigned> PgPattern = - getSVEPredPatternFromNumElements(FVTy->getNumElements()); - if (Subtarget->getMinSVEVectorSizeInBits() == - Subtarget->getMaxSVEVectorSizeInBits() && - Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy)) - PgPattern = AArch64SVEPredPattern::all; - - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern); - PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy}, - {PTruePat}); + if (DL.getTypeSizeInBits(FVTy) != Subtarget->getSVEVectorSizeInBits()) { + std::optional<unsigned> PgPattern = + getSVEPredPatternFromNumElements(FVTy->getNumElements()); + PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy, + Builder.getInt32(*PgPattern)); + } else + PTrue = ConstantInt::getTrue(PredTy); } for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { @@ -18963,18 +18959,13 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, Value *PTrue = nullptr; if (UseScalable) { - std::optional<unsigned> PgPattern = - getSVEPredPatternFromNumElements(SubVecTy->getNumElements()); - if (Subtarget->getMinSVEVectorSizeInBits() == - Subtarget->getMaxSVEVectorSizeInBits() && - Subtarget->getMinSVEVectorSizeInBits() == - DL.getTypeSizeInBits(SubVecTy)) - PgPattern = AArch64SVEPredPattern::all; - - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern); - PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy}, - {PTruePat}); + if (DL.getTypeSizeInBits(SubVecTy) != Subtarget->getSVEVectorSizeInBits()) { + std::optional<unsigned> PgPattern = + getSVEPredPatternFromNumElements(SubVecTy->getNumElements()); + PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy, + Builder.getInt32(*PgPattern)); + } else + PTrue = ConstantInt::getTrue(PredTy); } for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index fb8d2fb2cd2db..bf2fad02f9c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2155,15 +2155,12 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC, if ((PredicateBits & (1 << I)) == 0) return std::nullopt; - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); - auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, - {PredType}, {PTruePat}); - auto *ConvertToSVBool = IC.Builder.CreateIntrinsic( - Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue}); + auto *ConvertToSVBool = + IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, + PredType, ConstantInt::getTrue(PredType)); auto *ConvertFromSVBool = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, - {II.getType()}, {ConvertToSVBool}); + II.getType(), ConvertToSVBool); ConvertFromSVBool->takeName(&II); return IC.replaceInstUsesWith(II, ConvertFromSVBool); @@ -2287,15 +2284,10 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC, static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II) { - LLVMContext &Ctx = II.getContext(); // Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr // can work with RDFFR_PP for ptest elimination. - auto *AllPat = - ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); - auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, - {II.getType()}, {AllPat}); - auto *RDFFR = - IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {PTrue}); + auto *RDFFR = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, + ConstantInt::getTrue(II.getType())); RDFFR->takeName(&II); return IC.replaceInstUsesWith(II, RDFFR); } diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index f25f1b89164f7..7c676875acf92 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -7,8 +7,7 @@ target triple = "aarch64-linux-gnu" define void @load_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0 @@ -26,8 +25,7 @@ define void @load_factor2(ptr %ptr) #0 { define void @load_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 @@ -46,8 +44,7 @@ define void @load_factor3(ptr %ptr) #0 { define void @load_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 @@ -69,12 +66,11 @@ define void @load_factor4(ptr %ptr) #0 { define void @store_factor2(ptr %ptr, <16 x i16> %v0, <16 x i16> %v1) #0 { ; CHECK-LABEL: define void @store_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x i16> [[V0:%.*]], <16 x i16> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x i16> %v0, <16 x i16> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, @@ -88,14 +84,13 @@ define void @store_factor3(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2 ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, @@ -113,7 +108,6 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 ; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2]], <4 x i64> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -122,7 +116,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -135,8 +129,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 define void @load_ptrvec_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -154,8 +147,7 @@ define void @load_ptrvec_factor2(ptr %ptr) #0 { define void @load_ptrvec_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -177,8 +169,7 @@ define void @load_ptrvec_factor3(ptr %ptr) #0 { define void @load_ptrvec_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -206,12 +197,11 @@ define void @store_ptrvec_factor2(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1) #0 { ; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[V0]] to <4 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> @@ -226,14 +216,13 @@ define void @store_ptrvec_factor3(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -250,7 +239,6 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -259,7 +247,7 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP10]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -273,14 +261,13 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p define void @load_factor2_wide(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor2_wide( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[TMP6]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP7]], i64 0) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 0 @@ -298,18 +285,17 @@ define void @load_factor2_wide(ptr %ptr) #0 { define void @store_factor2_wide(ptr %ptr, <8 x i64> %v0, <8 x i64> %v1) #0 { ; CHECK-LABEL: define void @store_factor2_wide( ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i64> [[V0:%.*]], <8 x i64> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[TMP10]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[TMP10]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <8 x i64> %v0, <8 x i64> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> @@ -432,8 +418,7 @@ define void @store_min_ge_type(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 { define void @load_double_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_double_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 2 @@ -455,8 +440,7 @@ define void @load_double_factor4(ptr %ptr) #0 { define void @load_float_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_float_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1 @@ -475,8 +459,7 @@ define void @load_float_factor3(ptr %ptr) #0 { define void @load_half_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_half_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16(<vscale x 8 x half> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 0 @@ -492,8 +475,7 @@ define void @load_half_factor2(ptr %ptr) #0 { define void @load_bfloat_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_bfloat_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 0 @@ -511,7 +493,6 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, ; CHECK-SAME: ptr [[PTR:%.*]], <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0]], <4 x double> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2]], <4 x double> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -520,7 +501,7 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -535,14 +516,13 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 ; CHECK-SAME: ptr [[PTR:%.*]], <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <8 x float> [[V2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0]], <8 x float> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, @@ -558,12 +538,11 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { ; CHECK-LABEL: define void @store_half_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, @@ -576,12 +555,11 @@ define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { ; CHECK-LABEL: define void @store_bfloat_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, >From 973af88667996bd2b78f5061ceec56fa642e92d1 Mon Sep 17 00:00:00 2001 From: Paul Walker <[email protected]> Date: Thu, 11 Jun 2026 17:48:39 +0000 Subject: [PATCH 2/2] Remove use of auto. --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 5142ced664c90..4c668dabd53dc 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -4270,7 +4270,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return DupQLane; SVETypeFlags TypeFlags(Builtin->TypeModifier); - auto *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags)); + Constant *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags)); // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
