Author: Florian Hahn Date: 2021-09-25T15:15:54+01:00 New Revision: 9cd1cd6629a80374618b6a5adff68addfa779bd5
URL: https://github.com/llvm/llvm-project/commit/9cd1cd6629a80374618b6a5adff68addfa779bd5 DIFF: https://github.com/llvm/llvm-project/commit/9cd1cd6629a80374618b6a5adff68addfa779bd5.diff LOG: Fix Differential Revision: https://reviews.llvm.org/D75981 Added: Modified: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 870e7175c9221..c3a440ecc9b85 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -26,6 +26,8 @@ #include "VPlan.h" +class GeneratedRTChecks; + namespace llvm { class LoopInfo; @@ -183,12 +185,16 @@ struct VectorizationFactor { /// Cost of the loop with that width. InstructionCost Cost; - VectorizationFactor(ElementCount Width, InstructionCost Cost) - : Width(Width), Cost(Cost) {} + /// Cost of the scalar loop. + InstructionCost ScalarCost; + + VectorizationFactor(ElementCount Width, InstructionCost Cost, + InstructionCost ScalarCost) + : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {} /// Width 1 means no vectorization, cost 0 means uncomputed cost. static VectorizationFactor Disabled() { - return {ElementCount::getFixed(1), 0}; + return {ElementCount::getFixed(1), 0, 0}; } bool operator==(const VectorizationFactor &rhs) const { @@ -289,7 +295,8 @@ class LoopVectorizationPlanner { /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. - Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC); + Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC, + GeneratedRTChecks &Checks); /// Use the VPlan-native path to plan how to best vectorize, return the best /// VF and its cost. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e7a79ae69d2b8..8db7ecc7cd7ef 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -423,7 +423,6 @@ static Optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) { return None; } -// Forward declare GeneratedRTChecks. class GeneratedRTChecks; namespace llvm { @@ -1634,6 +1633,17 @@ class LoopVectorizationCostModel { Scalars.clear(); } + /// The vectorization cost is a combination of the cost itself and a boolean + /// indicating whether any of the contributing operations will actually + /// operate on vector values after type legalization in the backend. If this + /// latter value is false, then all operations will be scalarized (i.e. no + /// vectorization has actually taken place). + using VectorizationCostTy = std::pair<InstructionCost, bool>; + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + private: unsigned NumPredStores = 0; @@ -1662,13 +1672,6 @@ class LoopVectorizationCostModel { /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); - /// The vectorization cost is a combination of the cost itself and a boolean - /// indicating whether any of the contributing operations will actually - /// operate on vector values after type legalization in the backend. If this - /// latter value is false, then all operations will be scalarized (i.e. no - /// vectorization has actually taken place). - using VectorizationCostTy = std::pair<InstructionCost, bool>; - /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare diff erent /// vector widths. The cost that is returned is *not* normalized by @@ -1680,10 +1683,6 @@ class LoopVectorizationCostModel { expectedCost(ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid = nullptr); - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, @@ -6042,7 +6041,8 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( assert(VFCandidates.count(ElementCount::getFixed(1)) && "Expected Scalar VF to be a candidate"); - const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost); + const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost, + ExpectedCost); VectorizationFactor ChosenFactor = ScalarCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; @@ -6060,7 +6060,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( continue; VectorizationCostTy C = expectedCost(i, &InvalidCosts); - VectorizationFactor Candidate(i, C.first); + VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost); LLVM_DEBUG( dbgs() << "LV: Vector loop of width " << i << " costs: " << (Candidate.Cost / Candidate.Width.getKnownMinValue()) @@ -6251,7 +6251,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";); if (LVP.hasPlanWithVFs( {MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)})) - return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0}; + return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0, 0}; else { LLVM_DEBUG( dbgs() @@ -8079,7 +8079,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { if (VPlanBuildStressTest) return VectorizationFactor::Disabled(); - return {VF, 0 /*Cost*/}; + return {VF, 0 /*Cost*/, 0 /* ScalarCost */}; } LLVM_DEBUG( @@ -8089,7 +8089,8 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) { } Optional<VectorizationFactor> -LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { +LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC, + GeneratedRTChecks &Checks) { assert(OrigLoop->isInnermost() && "Inner loop expected."); FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC); if (!MaxFactors) // Cases that should not to be vectorized nor interleaved. @@ -8122,7 +8123,8 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { CM.collectInLoopReductions(); buildVPlansWithVPRecipes(UserVF, UserVF); LLVM_DEBUG(printPlans(dbgs())); - return {{UserVF, 0}}; + Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate()); + return {{UserVF, 0, 0}}; } else reportVectorizationInfo("UserVF ignored because of invalid costs.", "InvalidCost", ORE, OrigLoop); @@ -8158,6 +8160,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { // Select the optimal vectorization factor. auto SelectedVF = CM.selectVectorizationFactor(VFCandidates); + if (!SelectedVF.Width.isScalar()) + Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate()); + // Check if it is profitable to vectorize with runtime checks. unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks(); if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) { @@ -10276,8 +10281,10 @@ bool LoopVectorizePass::processLoop(Loop *L) { ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); + GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, + F->getParent()->getDataLayout()); // Plan how to best vectorize, return the best VF and its cost. - Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC); + Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC, Checks); VectorizationFactor VF = VectorizationFactor::Disabled(); unsigned IC = 1; @@ -10373,13 +10380,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool DisableRuntimeUnroll = false; MDNode *OrigLoopID = L->getLoopID(); { - // Optimistically generate runtime checks. Drop them if they turn out to not - // be profitable. Limit the scope of Checks, so the cleanup happens - // immediately after vector codegeneration is done. - GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, - F->getParent()->getDataLayout()); - if (!VF.Width.isScalar() || IC > 1) - Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); LVP.setBestPlan(VF.Width, IC); using namespace ore; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits