llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-globalisel Author: Tobias Stadler (tobias-stadler) <details> <summary>Changes</summary> Continues the work for disabling fixed-point iteration in the Combiner (#<!-- -->94291). This introduces improved Observer-based heuristics in the GISel Combiner to retry combining defs/uses of modified instructions and for performing sparse dead code elimination. I have experimented a lot with the heuristics and this seems to be the minimal set of heuristics that allows disabling fixed-point iteration for AArch64 CTMark O2 without regressions. Enabling this globally would pass all regression tests for all official targets (apart from small benign diffs), but I have made this fully opt-in for now, because I can't quantify the impact for other targets. For performance numbers see my follow-up patch for AArch64. --- Full diff: https://github.com/llvm/llvm-project/pull/102163.diff 3 Files Affected: - (modified) llvm/include/llvm/CodeGen/GlobalISel/Combiner.h (+8-2) - (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h (+25) - (modified) llvm/lib/CodeGen/GlobalISel/Combiner.cpp (+180-38) ``````````diff diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h index f826601544932..fa6a7be6cf6c3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Combiner.h @@ -15,13 +15,13 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_H #define LLVM_CODEGEN_GLOBALISEL_COMBINER_H +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" namespace llvm { class MachineRegisterInfo; -struct CombinerInfo; class GISelCSEInfo; class TargetPassConfig; class MachineFunction; @@ -33,8 +33,12 @@ class MachineIRBuilder; /// TODO: Is it worth making this module-wide? class Combiner : public GIMatchTableExecutor { private: + using WorkListTy = GISelWorkList<512>; + class WorkListMaintainer; - GISelWorkList<512> WorkList; + template <CombinerInfo::ObserverLevel Lvl> class WorkListMaintainerImpl; + + WorkListTy WorkList; // We have a little hack here where keep the owned pointers private, and only // expose a reference. This has two purposes: @@ -48,6 +52,8 @@ class Combiner : public GIMatchTableExecutor { bool HasSetupMF = false; + static bool tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI); + public: /// If CSEInfo is not null, then the Combiner will use CSEInfo as the observer /// and also create a CSEMIRBuilder. Pass nullptr if CSE is not needed. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 2b0eb71f88082..67f95c962c582 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -53,6 +53,31 @@ struct CombinerInfo { /// The maximum number of times the Combiner will iterate over the /// MachineFunction. Setting this to 0 enables fixed-point iteration. unsigned MaxIterations = 0; + + enum class ObserverLevel { + /// Only retry combining created/changed instructions. + /// This replicates the legacy default Observer behavior for use with + /// fixed-point iteration. + Basic, + /// Enables Observer-based detection of dead instructions. This can save + /// some compile-time if full disabling of fixed-point iteration is not + /// desired. If the input IR doesn't contain dead instructions, consider + /// disabling \p EnableFullDCE. + DCE, + /// Enables Observer-based DCE and additional heuristics that retry + /// combining defined and used instructions of modified instructions. + /// This provides a good balance between compile-time and completeness of + /// combining without needing fixed-point iteration. + SinglePass, + }; + + /// Select how the Combiner acts on MIR changes. + ObserverLevel ObserverLvl = ObserverLevel::Basic; + + /// Whether dead code elimination is performed before each Combiner iteration. + /// If Observer-based DCE is enabled, this controls if a full DCE pass is + /// performed before the first Combiner iteration. + bool EnableFullDCE = true; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 49842e5fd65da..c5ec73cd5c65d 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -45,61 +45,189 @@ cl::OptionCategory GICombinerOptionCategory( ); } // end namespace llvm -/// This class acts as the glue the joins the CombinerHelper to the overall +/// This class acts as the glue that joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the /// modifications it makes to the MIR to the GISelChangeObserver and the -/// observer subclass will act on these events. In this case, instruction -/// erasure will cancel any future visits to the erased instruction and -/// instruction creation will schedule that instruction for a future visit. -/// Other Combiner implementations may require more complex behaviour from -/// their GISelChangeObserver subclass. +/// observer subclass will act on these events. class Combiner::WorkListMaintainer : public GISelChangeObserver { - using WorkListTy = GISelWorkList<512>; - WorkListTy &WorkList; +protected: +#ifndef NDEBUG /// The instructions that have been created but we want to report once they /// have their operands. This is only maintained if debug output is requested. -#ifndef NDEBUG - SetVector<const MachineInstr *> CreatedInstrs; + SmallSetVector<const MachineInstr *, 32> CreatedInstrs; #endif + using Level = CombinerInfo::ObserverLevel; public: - WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} + static std::unique_ptr<WorkListMaintainer> + create(Level Lvl, WorkListTy &WorkList, MachineRegisterInfo &MRI); + virtual ~WorkListMaintainer() = default; + void reportFullyCreatedInstrs() { + LLVM_DEBUG({ + for (auto *MI : CreatedInstrs) { + dbgs() << "Created: " << *MI; + } + CreatedInstrs.clear(); + }); + } + + virtual void reset() = 0; + virtual void appliedCombine() = 0; +}; + +/// A configurable WorkListMaintainer implementation. +/// The ObserverLevel determines how the WorkListMaintainer reacts to MIR +/// changes. +template <CombinerInfo::ObserverLevel Lvl> +class Combiner::WorkListMaintainerImpl : public Combiner::WorkListMaintainer { + WorkListTy &WorkList; + MachineRegisterInfo &MRI; + + // Defer handling these instructions until the combine finishes. + SmallSetVector<MachineInstr *, 32> DeferList; + + // Track VRegs that (might) have lost a use. + SmallSetVector<Register, 32> LostUses; + +public: + WorkListMaintainerImpl(WorkListTy &WorkList, MachineRegisterInfo &MRI) + : WorkList(WorkList), MRI(MRI) {} + + virtual ~WorkListMaintainerImpl() = default; + + void reset() override { + DeferList.clear(); + LostUses.clear(); + } + void erasingInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n"); + // MI will become dangling, remove it from all lists. + LLVM_DEBUG(dbgs() << "Erasing: " << MI; CreatedInstrs.remove(&MI)); WorkList.remove(&MI); + if constexpr (Lvl != Level::Basic) { + DeferList.remove(&MI); + noteLostUses(MI); + } } + void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Creating: " << MI << "\n"); - WorkList.insert(&MI); - LLVM_DEBUG(CreatedInstrs.insert(&MI)); + LLVM_DEBUG(dbgs() << "Creating: " << MI; CreatedInstrs.insert(&MI)); + if constexpr (Lvl == Level::Basic) + WorkList.insert(&MI); + else + // Defer handling newly created instructions, because they don't have + // operands yet. We also insert them into the WorkList in reverse + // order so that they will be combined top down. + DeferList.insert(&MI); } + void changingInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Changing: " << MI << "\n"); - WorkList.insert(&MI); + LLVM_DEBUG(dbgs() << "Changing: " << MI); + // Some uses might get dropped when MI is changed. + // For now, overapproximate by assuming all uses will be dropped. + // TODO: Is a more precise heuristic or manual tracking of use count + // decrements worth it? + if constexpr (Lvl != Level::Basic) + noteLostUses(MI); } + void changedInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << "Changed: " << MI << "\n"); - WorkList.insert(&MI); + LLVM_DEBUG(dbgs() << "Changed: " << MI); + if constexpr (Lvl == Level::Basic) + WorkList.insert(&MI); + else + // Defer this for DCE + DeferList.insert(&MI); } - void reportFullyCreatedInstrs() { - LLVM_DEBUG(for (const auto *MI - : CreatedInstrs) { - dbgs() << "Created: "; - MI->print(dbgs()); - }); - LLVM_DEBUG(CreatedInstrs.clear()); + // Only track changes during the combine and then walk the def/use-chains once + // the combine is finished, because: + // - instructions might have multiple defs during the combine. + // - use counts aren't accurate during the combine. + void appliedCombine() override { + if constexpr (Lvl == Level::Basic) + return; + + // DCE deferred instructions and add them to the WorkList bottom up. + while (!DeferList.empty()) { + MachineInstr &MI = *DeferList.pop_back_val(); + if (tryDCE(MI, MRI)) + continue; + + if constexpr (Lvl >= Level::SinglePass) + addUsersToWorkList(MI); + + WorkList.insert(&MI); + } + + // Handle instructions that have lost a user. + while (!LostUses.empty()) { + Register Use = LostUses.pop_back_val(); + MachineInstr *UseMI = MRI.getVRegDef(Use); + if (!UseMI) + continue; + + // If DCE succeeds, UseMI's uses are added back to LostUses by + // erasingInstr. + if (tryDCE(*UseMI, MRI)) + continue; + + if constexpr (Lvl >= Level::SinglePass) { + // OneUse checks are relatively common, so we might be able to combine + // the single remaining user of this Reg. + if (MRI.hasOneNonDBGUser(Use)) + WorkList.insert(&*MRI.use_instr_nodbg_begin(Use)); + + WorkList.insert(UseMI); + } + } + } + + void noteLostUses(MachineInstr &MI) { + for (auto &Use : MI.explicit_uses()) { + if (!Use.isReg() || !Use.getReg().isVirtual()) + continue; + LostUses.insert(Use.getReg()); + } + } + + void addUsersToWorkList(MachineInstr &MI) { + for (auto &Def : MI.defs()) { + Register DefReg = Def.getReg(); + if (!DefReg.isVirtual()) + continue; + for (auto &UseMI : MRI.use_nodbg_instructions(DefReg)) { + WorkList.insert(&UseMI); + } + } } }; +std::unique_ptr<Combiner::WorkListMaintainer> +Combiner::WorkListMaintainer::create(Level Lvl, WorkListTy &WorkList, + MachineRegisterInfo &MRI) { + switch (Lvl) { + case Level::Basic: + return std::make_unique<WorkListMaintainerImpl<Level::Basic>>(WorkList, + MRI); + case Level::DCE: + return std::make_unique<WorkListMaintainerImpl<Level::DCE>>(WorkList, MRI); + case Level::SinglePass: + return std::make_unique<WorkListMaintainerImpl<Level::SinglePass>>(WorkList, + MRI); + } + llvm_unreachable("Illegal ObserverLevel"); +} + Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, GISelKnownBits *KB, GISelCSEInfo *CSEInfo) : Builder(CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>()), - WLObserver(std::make_unique<WorkListMaintainer>(WorkList)), + WLObserver(WorkListMaintainer::create(CInfo.ObserverLvl, WorkList, + MF.getRegInfo())), ObserverWrapper(std::make_unique<GISelObserverWrapper>()), CInfo(CInfo), Observer(*ObserverWrapper), B(*Builder), MF(MF), MRI(MF.getRegInfo()), KB(KB), TPC(TPC), CSEInfo(CSEInfo) { @@ -115,6 +243,15 @@ Combiner::Combiner(MachineFunction &MF, CombinerInfo &CInfo, Combiner::~Combiner() = default; +bool Combiner::tryDCE(MachineInstr &MI, MachineRegisterInfo &MRI) { + if (!isTriviallyDead(MI, MRI)) + return false; + LLVM_DEBUG(dbgs() << "Dead: " << MI); + llvm::salvageDebugInfo(MRI, MI); + MI.eraseFromParent(); + return true; +} + bool Combiner::combineMachineInstrs() { // If the ISel pipeline failed, do not bother running this pass. // FIXME: Should this be here or in individual combiner passes. @@ -141,27 +278,29 @@ bool Combiner::combineMachineInstrs() { ++Iteration; LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n'); + Changed = false; WorkList.clear(); + WLObserver->reset(); ObserverWrapper->clearObservers(); if (CSEInfo) ObserverWrapper->addObserver(CSEInfo); + // If Observer-based DCE is enabled, perform full DCE only before the first + // iteration. + bool EnableDCE = CInfo.ObserverLvl >= CombinerInfo::ObserverLevel::DCE + ? CInfo.EnableFullDCE && Iteration == 1 + : CInfo.EnableFullDCE; + // Collect all instructions. Do a post order traversal for basic blocks and // insert with list bottom up, so while we pop_back_val, we'll traverse top // down RPOT. - Changed = false; - RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper); for (MachineBasicBlock *MBB : post_order(&MF)) { for (MachineInstr &CurMI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { // Erase dead insts before even adding to the list. - if (isTriviallyDead(CurMI, MRI)) { - LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); - llvm::salvageDebugInfo(MRI, CurMI); - CurMI.eraseFromParent(); + if (EnableDCE && tryDCE(CurMI, MRI)) continue; - } WorkList.deferred_insert(&CurMI); } } @@ -171,10 +310,13 @@ bool Combiner::combineMachineInstrs() { ObserverWrapper->addObserver(WLObserver.get()); // Main Loop. Process the instructions here. while (!WorkList.empty()) { - MachineInstr *CurrInst = WorkList.pop_back_val(); - LLVM_DEBUG(dbgs() << "\nTry combining " << *CurrInst;); - Changed |= tryCombineAll(*CurrInst); - WLObserver->reportFullyCreatedInstrs(); + MachineInstr &CurrInst = *WorkList.pop_back_val(); + LLVM_DEBUG(dbgs() << "\nTry combining " << CurrInst); + bool AppliedCombine = tryCombineAll(CurrInst); + LLVM_DEBUG(WLObserver->reportFullyCreatedInstrs()); + Changed |= AppliedCombine; + if (AppliedCombine) + WLObserver->appliedCombine(); } MFChanged |= Changed; `````````` </details> https://github.com/llvm/llvm-project/pull/102163 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits