Author: Paul Walker Date: 2026-06-12T12:02:01+01:00 New Revision: 663bcb3574d72552b41de6a740e454f2f53e2f4a
URL: https://github.com/llvm/llvm-project/commit/663bcb3574d72552b41de6a740e454f2f53e2f4a DIFF: https://github.com/llvm/llvm-project/commit/663bcb3574d72552b41de6a740e454f2f53e2f4a.diff LOG: [SVE] Replace unnecessary Intrinsic::aarch64_sve_ptrue construction. (#203349) Prefer ConstantInt::getTrue() over sve.ptrue(31) when creating all-active boolean vectors. Added: Modified: clang/lib/CodeGen/CodeGenFunction.h clang/lib/CodeGen/TargetBuiltins/ARM.cpp clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index aeace0d789a61..4757cf1411937 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4877,7 +4877,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::Value *EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef<llvm::Value *> Ops); - llvm::Value *EmitSVEAllTruePred(const SVETypeFlags &TypeFlags); llvm::Value *EmitSVEDupX(llvm::Value *Scalar); llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty); diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index ece8ff21561cf..4c668dabd53dc 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -3390,13 +3390,6 @@ CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { } } -llvm::Value * -CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { - Function *Ptrue = - CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); - return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); -} - constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { @@ -4277,7 +4270,7 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, return DupQLane; SVETypeFlags TypeFlags(Builtin->TypeModifier); - Value *Pred = EmitSVEAllTruePred(TypeFlags); + Constant *Pred = ConstantInt::getTrue(getSVEPredType(TypeFlags)); // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c index 7a645c5c38b8c..d0c2883a34c67 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_dupq.c @@ -607,9 +607,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR // CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15 // CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0) // CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0) -// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]] +// CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]] // // CPP-CHECK-LABEL: @_Z16test_svdupq_n_b8bbbbbbbbbbbbbbbb( // CPP-CHECK-NEXT: entry: @@ -679,9 +678,8 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR // CPP-CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x i8> [[TMP30]], i8 [[TMP15]], i64 15 // CPP-CHECK-NEXT: [[TMP32:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[TMP31]], i64 0) // CPP-CHECK-NEXT: [[TMP33:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP32]], i64 0) -// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CPP-CHECK-NEXT: [[TMP35:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP34]], <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP35]] +// CPP-CHECK-NEXT: [[TMP34:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[TMP33]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP34]] // svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7, @@ -728,10 +726,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7 // CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0) // CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0) -// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] +// CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b16bbbbbbbb( // CPP-CHECK-NEXT: entry: @@ -769,10 +766,9 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> [[TMP14]], i16 [[TMP7]], i64 7 // CPP-CHECK-NEXT: [[TMP16:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> [[TMP15]], i64 0) // CPP-CHECK-NEXT: [[TMP17:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP16]], i64 0) -// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP18]], <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP20:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] +// CPP-CHECK-NEXT: [[TMP18:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP17]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP19:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP18]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, bool x4, bool x5, bool x6, bool x7) MODE_ATTR @@ -801,10 +797,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3 // CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0) // CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0) -// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] +// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b32bbbb( // CPP-CHECK-NEXT: entry: @@ -826,10 +821,9 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP3]], i64 3 // CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> [[TMP7]], i64 0) // CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0) -// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP9]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP10]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP11]] // svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR { @@ -849,10 +843,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR // CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) -// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) -// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) -// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) +// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]]) +// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]] // // CPP-CHECK-LABEL: @_Z17test_svdupq_n_b64bb( // CPP-CHECK-NEXT: entry: @@ -866,10 +859,9 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR // CPP-CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) -// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) -// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) -// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> zeroinitializer) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP6]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP7]] // svbool_t test_svdupq_n_b64(bool x0, bool x1) MODE_ATTR { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c8f16f1503aa9..b5e82333e0801 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18728,17 +18728,13 @@ bool AArch64TargetLowering::lowerInterleavedLoad( Value *PTrue = nullptr; if (UseScalable) { - std::optional<unsigned> PgPattern = - getSVEPredPatternFromNumElements(FVTy->getNumElements()); - if (Subtarget->getMinSVEVectorSizeInBits() == - Subtarget->getMaxSVEVectorSizeInBits() && - Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy)) - PgPattern = AArch64SVEPredPattern::all; - - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern); - PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy}, - {PTruePat}); + if (DL.getTypeSizeInBits(FVTy) != Subtarget->getSVEVectorSizeInBits()) { + std::optional<unsigned> PgPattern = + getSVEPredPatternFromNumElements(FVTy->getNumElements()); + PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy, + Builder.getInt32(*PgPattern)); + } else + PTrue = ConstantInt::getTrue(PredTy); } for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { @@ -18950,18 +18946,13 @@ bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store, Value *PTrue = nullptr; if (UseScalable) { - std::optional<unsigned> PgPattern = - getSVEPredPatternFromNumElements(SubVecTy->getNumElements()); - if (Subtarget->getMinSVEVectorSizeInBits() == - Subtarget->getMaxSVEVectorSizeInBits() && - Subtarget->getMinSVEVectorSizeInBits() == - DL.getTypeSizeInBits(SubVecTy)) - PgPattern = AArch64SVEPredPattern::all; - - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern); - PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy}, - {PTruePat}); + if (DL.getTypeSizeInBits(SubVecTy) != Subtarget->getSVEVectorSizeInBits()) { + std::optional<unsigned> PgPattern = + getSVEPredPatternFromNumElements(SubVecTy->getNumElements()); + PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, PredTy, + Builder.getInt32(*PgPattern)); + } else + PTrue = ConstantInt::getTrue(PredTy); } for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b19d967900641..71077846088a2 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2155,15 +2155,12 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC, if ((PredicateBits & (1 << I)) == 0) return std::nullopt; - auto *PTruePat = - ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); - auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, - {PredType}, {PTruePat}); - auto *ConvertToSVBool = IC.Builder.CreateIntrinsic( - Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue}); + auto *ConvertToSVBool = + IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, + PredType, ConstantInt::getTrue(PredType)); auto *ConvertFromSVBool = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, - {II.getType()}, {ConvertToSVBool}); + II.getType(), ConvertToSVBool); ConvertFromSVBool->takeName(&II); return IC.replaceInstUsesWith(II, ConvertFromSVBool); @@ -2287,15 +2284,10 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC, static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II) { - LLVMContext &Ctx = II.getContext(); // Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr // can work with RDFFR_PP for ptest elimination. - auto *AllPat = - ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all); - auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, - {II.getType()}, {AllPat}); - auto *RDFFR = - IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {PTrue}); + auto *RDFFR = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, + ConstantInt::getTrue(II.getType())); RDFFR->takeName(&II); return IC.replaceInstUsesWith(II, RDFFR); } diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index f25f1b89164f7..7c676875acf92 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -7,8 +7,7 @@ target triple = "aarch64-linux-gnu" define void @load_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv8i16(<vscale x 8 x i16> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[LDN]], 0 @@ -26,8 +25,7 @@ define void @load_factor2(ptr %ptr) #0 { define void @load_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[LDN]], 1 @@ -46,8 +44,7 @@ define void @load_factor3(ptr %ptr) #0 { define void @load_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 @@ -69,12 +66,11 @@ define void @load_factor4(ptr %ptr) #0 { define void @store_factor2(ptr %ptr, <16 x i16> %v0, <16 x i16> %v1) #0 { ; CHECK-LABEL: define void @store_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x i16> [[V0:%.*]], <16 x i16> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[V0]], <16 x i16> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8i16.p0(<vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x i16> %v0, <16 x i16> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, @@ -88,14 +84,13 @@ define void @store_factor3(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2 ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <8 x i32> [[V2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v8i32(<vscale x 4 x i32> poison, <8 x i32> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4i32.p0(<vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, @@ -113,7 +108,6 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 ; CHECK-SAME: ptr [[PTR:%.*]], <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0]], <4 x i64> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2]], <4 x i64> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -122,7 +116,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -135,8 +129,7 @@ define void @store_factor4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2 define void @load_ptrvec_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -154,8 +147,7 @@ define void @load_ptrvec_factor2(ptr %ptr) #0 { define void @load_ptrvec_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -177,8 +169,7 @@ define void @load_ptrvec_factor3(ptr %ptr) #0 { define void @load_ptrvec_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_ptrvec_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr <4 x i64> [[TMP3]] to <4 x ptr> @@ -206,12 +197,11 @@ define void @store_ptrvec_factor2(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1) #0 { ; CHECK-SAME: ptr [[PTR:%.*]], <4 x ptr> [[V0:%.*]], <4 x ptr> [[V1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[V0]] to <4 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[V1]] to <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> @@ -226,14 +216,13 @@ define void @store_ptrvec_factor3(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -250,7 +239,6 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x ptr> [[V2]], <4 x ptr> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <8 x ptr> [[S0]] to <8 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <8 x ptr> [[S1]] to <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -259,7 +247,7 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP10]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> [[TMP3]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2i64.p0(<vscale x 2 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x ptr> %v0, <4 x ptr> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -273,14 +261,13 @@ define void @store_ptrvec_factor4(ptr %ptr, <4 x ptr> %v0, <4 x ptr> %v1, <4 x p define void @load_factor2_wide(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_factor2_wide( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 -; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[TMP6]]) +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 1 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[TMP7]], i64 0) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[LDN1]], 0 @@ -298,18 +285,17 @@ define void @load_factor2_wide(ptr %ptr) #0 { define void @store_factor2_wide(ptr %ptr, <8 x i64> %v0, <8 x i64> %v1) #0 { ; CHECK-LABEL: define void @store_factor2_wide( ; CHECK-SAME: ptr [[PTR:%.*]], <8 x i64> [[V0:%.*]], <8 x i64> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP5]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i64> [[V0]], <8 x i64> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> poison, <4 x i64> [[TMP8]], i64 0) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[PTR]], i32 8 -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[TMP10]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv2i64.p0(<vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[TMP10]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <8 x i64> %v0, <8 x i64> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> @@ -432,8 +418,7 @@ define void @store_min_ge_type(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 { define void @load_double_factor4(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_double_factor4( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64.p0(<vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[LDN]], 2 @@ -455,8 +440,7 @@ define void @load_double_factor4(ptr %ptr) #0 { define void @load_float_factor3(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_float_factor3( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32.p0(<vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[LDN]], 1 @@ -475,8 +459,7 @@ define void @load_float_factor3(ptr %ptr) #0 { define void @load_half_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_half_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x half> @llvm.vector.extract.v16f16.nxv8f16(<vscale x 8 x half> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[LDN]], 0 @@ -492,8 +475,7 @@ define void @load_half_factor2(ptr %ptr) #0 { define void @load_bfloat_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @load_bfloat_factor2( ; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: [[LDN:%.*]] = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16.p0(<vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x bfloat> @llvm.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[LDN]], 0 @@ -511,7 +493,6 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, ; CHECK-SAME: ptr [[PTR:%.*]], <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0]], <4 x double> [[V1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2]], <4 x double> [[V3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -520,7 +501,7 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP6]], i64 0) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> poison, <4 x double> [[TMP8]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64.p0(<vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]], <vscale x 2 x double> [[TMP7]], <vscale x 2 x double> [[TMP9]], <vscale x 2 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> @@ -535,14 +516,13 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 ; CHECK-SAME: ptr [[PTR:%.*]], <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <8 x float> [[V2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0]], <8 x float> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP4]], i64 0) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> poison, <8 x float> [[TMP6]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32.p0(<vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP5]], <vscale x 4 x float> [[TMP7]], <vscale x 4 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, @@ -558,12 +538,11 @@ define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { ; CHECK-LABEL: define void @store_half_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v16f16(<vscale x 8 x half> poison, <16 x half> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16.p0(<vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, @@ -576,12 +555,11 @@ define void @store_half_factor2(ptr %ptr, <16 x half> %v0, <16 x half> %v1) #0 { define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { ; CHECK-LABEL: define void @store_bfloat_factor2( ; CHECK-SAME: ptr [[PTR:%.*]], <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP2]], i64 0) ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> poison, <16 x bfloat> [[TMP4]], i64 0) -; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> [[TMP1]], ptr [[PTR]]) +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x i1> splat (i1 true), ptr [[PTR]]) ; CHECK-NEXT: ret void ; %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
