llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) <details> <summary>Changes</summary> Move vector pointer generation to a separate VPInstruction opcode. This untangles address computation from the memory recipes future and is also needed to enable explicit unrolling in VPlan. --- Patch is 333.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72164.diff 57 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+17-46) - (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+3-1) - (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+47) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll (+9-9) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+37-37) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll (+8-8) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+45-45) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+24-24) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll (+2-2) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+2-1) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+9-9) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+6-3) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+24-12) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll (+2-2) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+4-2) - (modified) llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll (+108-108) - (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+9-9) - (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+1-1) - (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+8-4) - (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+8-8) - (modified) llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+9-9) - (modified) llvm/test/Transforms/LoopVectorize/X86/interleaving.ll (+30-30) - (modified) llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll (+12-12) - (modified) llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+33-33) - (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+6-6) - (modified) llvm/test/Transforms/LoopVectorize/X86/pr35432.ll (+2-2) - (modified) llvm/test/Transforms/LoopVectorize/X86/pr47437.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll (+6-6) - (modified) llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+8-4) - (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+13-13) - (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+8-8) - (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+39-39) - (modified) llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (+32-32) - (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+3-3) - (modified) llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll (+6-6) - (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+4-4) - (modified) llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll (+2-2) - (modified) llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll (+2-2) - (modified) llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll (+4-2) - (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+2-1) - (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+36-18) - (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+4-2) ``````````diff diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ae8d306c44dd885..e3374724b04a144 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8231,13 +8231,24 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, bool Consecutive = Reverse || Decision == LoopVectorizationCostModel::CM_Widen; + VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1]; + if (Decision != LoopVectorizationCostModel::CM_GatherScatter && + Decision != LoopVectorizationCostModel::CM_Interleave) { + auto *VectorPtr = Reverse + ? new VPInstruction(VPInstruction::CreateVectorPtr, + {Ptr, Ptr}, I->getDebugLoc()) + : new VPInstruction(VPInstruction::CreateVectorPtr, + {Ptr}, I->getDebugLoc()); + Builder.getInsertBlock()->appendRecipe(VectorPtr); + Ptr = VectorPtr; + } if (LoadInst *Load = dyn_cast<LoadInst>(I)) - return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask, - Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, + Reverse); StoreInst *Store = cast<StoreInst>(I); - return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0], - Mask, Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask, + Consecutive, Reverse); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9532,44 +9543,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { BlockInMaskParts[Part] = Mask; } - const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = nullptr; - - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = - Builder.GetInsertBlock()->getModule()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0) - ? DL.getIndexType(PointerType::getUnqual( - ScalarDataTy->getContext())) - : Builder.getInt32Ty(); - bool InBounds = false; - if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) - InBounds = gep->isInBounds(); - if (isReverse()) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -Part * RunTimeVF - Value *NumElt = - Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); - PartPtr = - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); - PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); - } - - return PartPtr; - }; - // Handle Stores: if (SI) { State.setDebugLocFrom(SI->getDebugLoc()); @@ -9590,8 +9563,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, BlockInMaskParts[Part]); @@ -9615,8 +9587,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { nullptr, "wide.masked.gather"); State.addMetadata(NewLI, LI); } else { - auto *VecPtr = - CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0))); + auto *VecPtr = State.get(getAddr(), Part); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad( DataTy, VecPtr, Alignment, BlockInMaskParts[Part], diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a26308a212bbd3c..be770e33e92a32b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1038,7 +1038,8 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue { // canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, BranchOnCount, - BranchOnCond + BranchOnCond, + CreateVectorPtr }; private: @@ -1146,6 +1147,7 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue { case VPInstruction::CanonicalIVIncrement: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: + case VPInstruction::CreateVectorPtr: return true; }; llvm_unreachable("switch should return"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6b3218dca1b18b0..1dac8a806d657cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -122,6 +122,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrement: case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::CreateVectorPtr: return false; default: return true; @@ -404,6 +405,49 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); return CondBr; } + case VPInstruction::CreateVectorPtr: { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = nullptr; + bool IsReverse = getNumOperands() > 1; + auto *MemR = cast<VPWidenMemoryInstructionRecipe>(*user_begin()); + Type *ScalarDataTy = + MemR->isStore() ? cast<StoreInst>(&MemR->getIngredient()) + ->getValueOperand() + ->getType() + : cast<LoadInst>(&MemR->getIngredient())->getType(); + // Use i32 for the gep index type when the value is constant, + // or query DataLayout for a more suitable index type otherwise. + const DataLayout &DL = + Builder.GetInsertBlock()->getModule()->getDataLayout(); + Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0) + ? DL.getIndexType(ScalarDataTy->getPointerTo()) + : Builder.getInt32Ty(); + Value *Ptr = State.get(getOperand(0), VPIteration(0, 0)); + bool InBounds = false; + if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts())) + InBounds = gep->isInBounds(); + if (IsReverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + // RunTimeVF = VScale * VF.getKnownMinValue() + // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() + Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); + // NumElt = -Part * RunTimeVF + Value *NumElt = + Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = + Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); + PartPtr = + Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); + } else { + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); + PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); + } + + return PartPtr; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -483,6 +527,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BranchOnCount: O << "branch-on-count"; break; + case VPInstruction::CreateVectorPtr: + O << "create-vector-pointer"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 4a8e07eaaf757fa..cbc4733cf5cf5fa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -179,8 +179,8 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP6]] to <16 x i8> ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP9]] -; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 +; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 @@ -193,18 +193,18 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> undef, i16 [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = mul <8 x i16> [[TMP15]], [[TMP13]] ; CHECK-NEXT: [[TMP17:%.*]] = lshr <8 x i16> [[TMP16]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> ; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i16> [[TMP17]] to <8 x i8> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP18]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[INDEX4]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[INDEX3]] to i64 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP20]] ; CHECK-NEXT: store <8 x i8> [[TMP19]], ptr [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i32 [[INDEX4]], 8 -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT9]], 1000 +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX3]], 8 +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000 ; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -268,7 +268,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: vec.epilog.ph: ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> undef, i16 [[TMP10]], i64 0 ; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison> @@ -276,11 +276,11 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8> -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX2]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX1]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]] ; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1 -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 1000 +; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000 ; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 24d6d2d532aa0c2..24c59fdb47b6133 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -38,8 +38,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -55,22 +55,22 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX9]], 0 -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX9]], 1 -; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP10]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP11]], i32 1 +; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX8]], 0 +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX8]], 1 +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP9]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP10]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP10]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP9]], i32 0 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 2 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 10000 +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT11]], 10000 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -132,8 +132,8 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> @@ -156,13 +156,13 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1> ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX7]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND8]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <2 x i64> [[VEC_IND8]], <i64 2, i64 2> ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -224,8 +224,8 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 +; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP5]], align 4 ; CHECK-NEXT: stor... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/72164 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits