llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) <details> <summary>Changes</summary> Add a new PtrAdd opcode to VPInstruction that corresponds to IRBuilder::CreatePtrAdd, which creates a GEP with source element type i8. This is then used to model scalarizing VPWidenPointerInductionRecipe by introducing scalar-steps to model the index increment followed by a PtrAdd. Note that PtrAdd needs to be able to generate code for only the first lane or for all lanes. This may warrant introducing a separate recipe for scalarizing that can be created without relying on the underlying IR. --- Patch is 125.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80273.diff 18 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2-33) - (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+1-1) - (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+11-1) - (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+2) - (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+25-2) - (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-8) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+49-51) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+73-75) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll (+37-37) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+24-25) - (modified) llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll (+75-79) - (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll (+9-10) - (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+83-95) - (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+49-60) - (modified) llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll (+31-30) - (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+34-37) - (modified) llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll (+32-35) - (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+5-3) ``````````diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 17a0d01f18072..4ee878358f9bc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9233,42 +9233,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) { "Not a pointer induction according to InductionDescriptor!"); assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() && "Unexpected type."); + assert(!onlyScalarsGenerated(State.VF.isScalable()) && + "Recipe should have been replaced"); auto *IVR = getParent()->getPlan()->getCanonicalIV(); PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0)); - - if (onlyScalarsGenerated(State.VF)) { - // This is the normalized GEP that starts counting at zero. - Value *PtrInd = State.Builder.CreateSExtOrTrunc( - CanonicalIV, IndDesc.getStep()->getType()); - // Determine the number of scalars we need to generate for each unroll - // iteration. If the instruction is uniform, we only need to generate the - // first lane. Otherwise, we generate all VF values. - bool IsUniform = vputils::onlyFirstLaneUsed(this); - assert((IsUniform || !State.VF.isScalable()) && - "Cannot scalarize a scalable VF"); - unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue(); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *PartStart = - createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part); - - for (unsigned Lane = 0; Lane < Lanes; ++Lane) { - Value *Idx = State.Builder.CreateAdd( - PartStart, ConstantInt::get(PtrInd->getType(), Lane)); - Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx); - - Value *Step = State.get(getOperand(1), VPIteration(Part, Lane)); - Value *SclrGep = emitTransformedIndex( - State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, - IndDesc.getKind(), IndDesc.getInductionBinOp()); - SclrGep->setName("next.gep"); - State.set(this, SclrGep, VPIteration(Part, Lane)); - } - } - return; - } - Type *PhiType = IndDesc.getStep()->getType(); // Build a pointer phi diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 7ed07fe5f413a..51576d9f5364e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -859,7 +859,7 @@ void VPlan::execute(VPTransformState *State) { auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R); // TODO: Split off the case that all users of a pointer phi are scalar // from the VPWidenPointerInductionRecipe. - if (WidenPhi->onlyScalarsGenerated(State->VF)) + if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable())) continue; auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 97035146a2f4d..4904287412f8a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1156,6 +1156,7 @@ class VPInstruction : public VPRecipeWithIRFlags { BranchOnCount, BranchOnCond, ComputeReductionResult, + PtrAdd, }; private: @@ -1736,7 +1737,7 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { void execute(VPTransformState &State) override; /// Returns true if only scalar values will be generated. - bool onlyScalarsGenerated(ElementCount VF); + bool onlyScalarsGenerated(bool Scalable); /// Returns the induction descriptor for the recipe. const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } @@ -2502,6 +2503,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()), Start, CanonicalIV, Step) {} + VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, VPValue *Start, + VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, + FPMathOperator *FPBinOp) + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), + Kind(Kind), FPBinOp(FPBinOp) {} + ~VPDerivedIVRecipe() override = default; VPRecipeBase *clone() override { @@ -2957,6 +2964,9 @@ class VPlan { } bool hasVF(ElementCount VF) { return VFs.count(VF); } + bool hasScalableVF() { + return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); + } bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 515dc41a55ea1..bd2f65935e479 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -43,6 +43,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { CachedTypes[OtherV] = ResTy; return ResTy; } + case VPInstruction::PtrAdd: + return inferScalarType(R->getOperand(0)); default: break; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 77f2cf899b085..d6550821fb57a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPInstruction::Not: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::PtrAdd: return false; default: return true; @@ -489,6 +490,23 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, return ReducedPartRdx; } + case VPInstruction::PtrAdd: { + if (vputils::onlyFirstLaneUsed(this)) { + auto *P = + Builder.CreatePtrAdd(State.get(getOperand(0), VPIteration(Part, 0)), + State.get(getOperand(1), VPIteration(Part, 0))); + State.set(this, P, VPIteration(Part, 0)); + } else { + for (unsigned Lane = 0; Lane != State.VF.getKnownMinValue(); ++Lane) { + Value *P = Builder.CreatePtrAdd( + State.get(getOperand(0), VPIteration(Part, Lane)), + State.get(getOperand(1), VPIteration(Part, Lane))); + + State.set(this, P, VPIteration(Part, Lane)); + } + } + return nullptr; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -515,6 +533,8 @@ void VPInstruction::execute(VPTransformState &State) { State.Builder.setFastMathFlags(getFastMathFlags()); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *GeneratedValue = generateInstruction(State, Part); + if (!GeneratedValue) + continue; if (!hasResult()) continue; assert(GeneratedValue && "generateInstruction must produce a value"); @@ -598,6 +618,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ComputeReductionResult: O << "compute-reduction-result"; break; + case VPInstruction::PtrAdd: + O << "ptradd"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1686,9 +1709,9 @@ bool VPCanonicalIVPHIRecipe::isCanonical( return StepC && StepC->isOne(); } -bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { +bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool Scalable) { return IsScalarAfterVectorization && - (!VF.isScalable() || vputils::onlyFirstLaneUsed(this)); + (!Scalable || vputils::onlyFirstLaneUsed(this)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 71f5285f90236..6964f1bec2675 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -489,15 +489,18 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { } } -static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, +static VPValue *createScalarIVSteps(VPlan &Plan, + InductionDescriptor::InductionKind Kind, ScalarEvolution &SE, Instruction *TruncI, VPValue *StartV, VPValue *Step, - VPBasicBlock::iterator IP) { + Instruction::BinaryOps InductionOpcode, + VPBasicBlock::iterator IP, + FPMathOperator *FPBinOp = nullptr) { VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); VPSingleDefRecipe *BaseIV = CanonicalIV; - if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { - BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); + if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { + BaseIV = new VPDerivedIVRecipe(Kind, StartV, CanonicalIV, Step, FPBinOp); HeaderVPBB->insert(BaseIV, IP); } @@ -526,7 +529,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, VecPreheader->appendRecipe(Step->getDefiningRecipe()); } - VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( + BaseIV, Step, InductionOpcode, + FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); HeaderVPBB->insert(Steps, IP); return Steps; } @@ -537,6 +542,30 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) { + if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) + continue; + + const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); + VPValue *StartV = Plan.getVPValueOrAddLiveIn( + ConstantInt::get(ID.getStep()->getType(), 0)); + VPValue *StepV = PtrIV->getOperand(1); + VPRecipeBase *Steps = + createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, SE, + nullptr, StartV, StepV, Instruction::Add, + InsertPt) + ->getDefiningRecipe(); + + auto *Recipe = + new VPInstruction(VPInstruction::PtrAdd, + {PtrIV->getStartValue(), Steps->getVPSingleValue()}, + PtrIV->getDebugLoc()); + + Recipe->insertAfter(Steps); + PtrIV->replaceAllUsesWith(Recipe); + continue; + } + auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); if (!WideIV) continue; @@ -546,9 +575,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { continue; const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), - WideIV->getStartValue(), - WideIV->getStepValue(), InsertPt); + VPValue *Steps = createScalarIVSteps( + Plan, ID.getKind(), SE, WideIV->getTruncInst(), WideIV->getStartValue(), + WideIV->getStepValue(), ID.getInductionOpcode(), InsertPt, + dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp())); // Update scalar users of IV to use Step instead. if (!HasOnlyVectorVFs) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 24c59fdb47b61..00ec396107dcb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -11,76 +11,74 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x ptr> [[TMP2]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]]) -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x ptr> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP4]], i32 2 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP18]], align 1 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0 -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000 -; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX3]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX3]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> [[TMP25]], ptr [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <2 x ptr> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP23]], i32 0 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 2 +; CHECK-... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/80273 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits