https://github.com/cdevadas updated https://github.com/llvm/llvm-project/pull/106605
>From 607099de09be2fed6d9277c8439ade69e0820d92 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan <christudasan.devada...@amd.com> Date: Thu, 29 Aug 2024 22:21:22 +0530 Subject: [PATCH 1/3] [CodeGen][NewPM] Port MachineCSE pass to new pass manager. --- llvm/include/llvm/CodeGen/MachineCSE.h | 29 ++ llvm/include/llvm/CodeGen/Passes.h | 2 +- llvm/include/llvm/InitializePasses.h | 2 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 2 +- llvm/lib/CodeGen/CodeGen.cpp | 2 +- llvm/lib/CodeGen/MachineCSE.cpp | 283 ++++++++++-------- llvm/lib/CodeGen/TargetPassConfig.cpp | 6 +- llvm/lib/Passes/PassBuilder.cpp | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +- .../GlobalISel/machine-cse-mid-pipeline.mir | 1 + .../AArch64/sve-pfalse-machine-cse.mir | 1 + .../no-cse-nonlocal-convergent-instrs.mir | 1 + .../copyprop_regsequence_with_undef.mir | 1 + llvm/test/CodeGen/AMDGPU/machine-cse-ssa.mir | 8 +- .../CodeGen/PowerPC/machine-cse-rm-pre.mir | 1 + .../CodeGen/Thumb/machine-cse-deadreg.mir | 1 + .../CodeGen/Thumb/machine-cse-physreg.mir | 1 + llvm/test/CodeGen/X86/cse-two-preds.mir | 1 + llvm/test/DebugInfo/MIR/X86/machine-cse.mir | 1 + 21 files changed, 215 insertions(+), 134 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/MachineCSE.h diff --git a/llvm/include/llvm/CodeGen/MachineCSE.h b/llvm/include/llvm/CodeGen/MachineCSE.h new file mode 100644 index 00000000000000..7440068e3e6f46 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineCSE.h @@ -0,0 +1,29 @@ +//===- llvm/CodeGen/MachineCSE.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINECSE_H +#define LLVM_CODEGEN_MACHINECSE_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class MachineCSEPass : public PassInfoMixin<MachineCSEPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINECSE_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index dbdd110b0600e5..ddb2012cd2bffc 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -330,7 +330,7 @@ namespace llvm { extern char &GCMachineCodeAnalysisID; /// MachineCSE - This pass performs global CSE on machine instructions. - extern char &MachineCSEID; + extern char &MachineCSELegacyID; /// MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs /// according to the semantics of the instruction as well as hoists diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 47a1ca15fc0d1f..6605c6fde92510 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -188,7 +188,7 @@ void initializeMachineBlockPlacementPass(PassRegistry &); void initializeMachineBlockPlacementStatsPass(PassRegistry &); void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &); void initializeMachineCFGPrinterPass(PassRegistry &); -void initializeMachineCSEPass(PassRegistry &); +void initializeMachineCSELegacyPass(PassRegistry &); void initializeMachineCombinerPass(PassRegistry &); void initializeMachineCopyPropagationPass(PassRegistry &); void initializeMachineCycleInfoPrinterPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index eb15beb835b535..6c34747b9da406 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -42,6 +42,7 @@ #include "llvm/CodeGen/LocalStackSlotAllocation.h" #include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineCSE.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePassManager.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index b710b1c46f643f..cb781532e266e6 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -131,6 +131,7 @@ MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) #endif MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) +MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass()) MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass()) @@ -219,7 +220,6 @@ DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass) DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass) DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass) DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass) -DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass) DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 177702054a0e31..16b8d456748fac 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -75,7 +75,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCFGPrinterPass(Registry); - initializeMachineCSEPass(Registry); + initializeMachineCSELegacyPass(Registry); initializeMachineCombinerPass(Registry); initializeMachineCopyPropagationPass(Registry); initializeMachineCycleInfoPrinterPassPass(Registry); diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 2ac1fae9ea48c1..2ab95fb4d9641a 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineCSE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallPtrSet.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -69,110 +71,110 @@ static cl::opt<bool> AggressiveMachineCSE( namespace { - class MachineCSE : public MachineFunctionPass { - const TargetInstrInfo *TII = nullptr; - const TargetRegisterInfo *TRI = nullptr; - MachineDominatorTree *DT = nullptr; - MachineRegisterInfo *MRI = nullptr; - MachineBlockFrequencyInfo *MBFI = nullptr; - - public: - static char ID; // Pass identification - - MachineCSE() : MachineFunctionPass(ID) { - initializeMachineCSEPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - AU.addPreservedID(MachineLoopInfoID); - AU.addRequired<MachineDominatorTreeWrapperPass>(); - AU.addPreserved<MachineDominatorTreeWrapperPass>(); - AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); - AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); - } - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties() - .set(MachineFunctionProperties::Property::IsSSA); - } +class MachineCSEImpl { + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineDominatorTree *DT = nullptr; + MachineRegisterInfo *MRI = nullptr; + MachineBlockFrequencyInfo *MBFI = nullptr; + +public: + MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI) + : DT(DT), MBFI(MBFI) {} + bool run(MachineFunction &MF); + +private: + using AllocatorTy = + RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MachineInstr *, unsigned>>; + using ScopedHTType = + ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, + AllocatorTy>; + using ScopeType = ScopedHTType::ScopeTy; + using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>; + + unsigned LookAheadLimit = 0; + DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; + DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> + PREMap; + ScopedHTType VNT; + SmallVector<MachineInstr *, 64> Exps; + unsigned CurrVN = 0; + + bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(MCRegister Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const; + bool hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<MCRegister, 8> &PhysRefs, + PhysDefVector &PhysDefs, bool &PhysUseDef) const; + bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<MCRegister, 8> &PhysRefs, + PhysDefVector &PhysDefs, bool &NonLocal) const; + bool isCSECandidate(MachineInstr *MI); + bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, + MachineInstr *MI); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlockCSE(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren); + bool PerformCSE(MachineDomTreeNode *Node); + + bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); + bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); + bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's profitable to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, MachineBasicBlock *MBB1); + void releaseMemory(); +}; + +class MachineCSELegacy : public MachineFunctionPass { +public: + static char ID; // Pass identification + + MachineCSELegacy() : MachineFunctionPass(ID) { + initializeMachineCSELegacyPass(*PassRegistry::getPassRegistry()); + } - void releaseMemory() override { - ScopeMap.clear(); - PREMap.clear(); - Exps.clear(); - } + bool runOnMachineFunction(MachineFunction &MF) override; - private: - using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, - ScopedHashTableVal<MachineInstr *, unsigned>>; - using ScopedHTType = - ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, - AllocatorTy>; - using ScopeType = ScopedHTType::ScopeTy; - using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>; - - unsigned LookAheadLimit = 0; - DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; - DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> - PREMap; - ScopedHTType VNT; - SmallVector<MachineInstr *, 64> Exps; - unsigned CurrVN = 0; - - bool PerformTrivialCopyPropagation(MachineInstr *MI, - MachineBasicBlock *MBB); - bool isPhysDefTriviallyDead(MCRegister Reg, - MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const; - bool hasLivePhysRegDefUses(const MachineInstr *MI, - const MachineBasicBlock *MBB, - SmallSet<MCRegister, 8> &PhysRefs, - PhysDefVector &PhysDefs, bool &PhysUseDef) const; - bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<MCRegister, 8> &PhysRefs, - PhysDefVector &PhysDefs, bool &NonLocal) const; - bool isCSECandidate(MachineInstr *MI); - bool isProfitableToCSE(Register CSReg, Register Reg, - MachineBasicBlock *CSBB, MachineInstr *MI); - void EnterScope(MachineBasicBlock *MBB); - void ExitScope(MachineBasicBlock *MBB); - bool ProcessBlockCSE(MachineBasicBlock *MBB); - void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); - bool PerformCSE(MachineDomTreeNode *Node); - - bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); - bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); - bool PerformSimplePRE(MachineDominatorTree *DT); - /// Heuristics to see if it's profitable to move common computations of MBB - /// and MBB1 to CandidateBB. - bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, - MachineBasicBlock *MBB, - MachineBasicBlock *MBB1); - }; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); + AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); + } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; } // end anonymous namespace -char MachineCSE::ID = 0; +char MachineCSELegacy::ID = 0; -char &llvm::MachineCSEID = MachineCSE::ID; +char &llvm::MachineCSELegacyID = MachineCSELegacy::ID; -INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) -INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, +INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE, "Machine Common Subexpression Elimination", false, false) /// The source register of a COPY machine instruction can be propagated to all /// its users, and this propagation could increase the probability of finding /// common subexpressions. If the COPY has only one user, the COPY itself can /// be removed. -bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, - MachineBasicBlock *MBB) { +bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { bool Changed = false; for (MachineOperand &MO : MI->all_uses()) { Register Reg = MO.getReg(); @@ -225,7 +227,7 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, return Changed; } -bool MachineCSE::isPhysDefTriviallyDead( +bool MachineCSEImpl::isPhysDefTriviallyDead( MCRegister Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const { unsigned LookAheadLeft = LookAheadLimit; @@ -282,11 +284,11 @@ static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, /// physical registers (except for dead defs of physical registers). It also /// returns the physical register def by reference if it's the only one and the /// instruction does not uses a physical register. -bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, - const MachineBasicBlock *MBB, - SmallSet<MCRegister, 8> &PhysRefs, - PhysDefVector &PhysDefs, - bool &PhysUseDef) const { +bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<MCRegister, 8> &PhysRefs, + PhysDefVector &PhysDefs, + bool &PhysUseDef) const { // First, add all uses to PhysRefs. for (const MachineOperand &MO : MI->all_uses()) { Register Reg = MO.getReg(); @@ -333,10 +335,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, return !PhysRefs.empty(); } -bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, - SmallSet<MCRegister, 8> &PhysRefs, - PhysDefVector &PhysDefs, - bool &NonLocal) const { +bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<MCRegister, 8> &PhysRefs, + PhysDefVector &PhysDefs, + bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception // is if the common subexpression is in the sole predecessor block. @@ -400,7 +402,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, return false; } -bool MachineCSE::isCSECandidate(MachineInstr *MI) { +bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) { if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || MI->isFakeUse()) @@ -437,8 +439,9 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a /// common expression that defines Reg. CSBB is basic block where CSReg is /// defined. -bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, - MachineBasicBlock *CSBB, MachineInstr *MI) { +bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg, + MachineBasicBlock *CSBB, + MachineInstr *MI) { if (AggressiveMachineCSE) return true; @@ -513,13 +516,13 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, return !HasPHI; } -void MachineCSE::EnterScope(MachineBasicBlock *MBB) { +void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); ScopeType *Scope = new ScopeType(VNT); ScopeMap[MBB] = Scope; } -void MachineCSE::ExitScope(MachineBasicBlock *MBB) { +void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) { LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); assert(SI != ScopeMap.end()); @@ -527,7 +530,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) { ScopeMap.erase(SI); } -bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { +bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; @@ -748,9 +751,9 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given /// dominator tree node if its a leaf or all of its children are done. Walk /// up the dominator tree to destroy ancestors which are now done. -void -MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { +void MachineCSEImpl::ExitScopeIfDone( + MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) { if (OpenChildren[Node]) return; @@ -767,7 +770,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, } } -bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { +bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) { SmallVector<MachineDomTreeNode*, 32> Scopes; SmallVector<MachineDomTreeNode*, 8> WorkList; DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; @@ -799,8 +802,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { // We use stronger checks for PRE candidate rather than for CSE ones to embrace // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps // to exclude instrs created by PRE that won't be CSEed later. -bool MachineCSE::isPRECandidate(MachineInstr *MI, - SmallSet<MCRegister, 8> &PhysRefs) { +bool MachineCSEImpl::isPRECandidate(MachineInstr *MI, + SmallSet<MCRegister, 8> &PhysRefs) { if (!isCSECandidate(MI) || MI->isNotDuplicable() || MI->mayLoad() || @@ -821,8 +824,8 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI, return true; } -bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, - MachineBasicBlock *MBB) { +bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT, + MachineBasicBlock *MBB) { bool Changed = false; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { SmallSet<MCRegister, 8> PhysRefs; @@ -902,7 +905,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, // anticipating that the next CSE step will eliminate this created redundancy. // If CSE doesn't eliminate this, than created instruction will remain dead // and eliminated later by Remove Dead Machine Instructions pass. -bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { +bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) { SmallVector<MachineDomTreeNode *, 32> BBs; PREMap.clear(); @@ -920,9 +923,9 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { return Changed; } -bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, - MachineBasicBlock *MBB, - MachineBasicBlock *MBB1) { +bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { if (CandidateBB->getParent()->getFunction().hasMinSize()) return true; assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); @@ -932,18 +935,54 @@ bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); } -bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; +void MachineCSEImpl::releaseMemory() { + ScopeMap.clear(); + PREMap.clear(); + Exps.clear(); +} +bool MachineCSEImpl::run(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); - MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); ChangedCSE = PerformCSE(DT->getRootNode()); + releaseMemory(); return ChangedPRE || ChangedCSE; } + +PreservedAnalyses MachineCSEPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(*this, MF); + + if (MF.getFunction().hasOptNone()) + return PreservedAnalyses::all(); + + MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF); + MachineBlockFrequencyInfo &MBFI = + MFAM.getResult<MachineBlockFrequencyAnalysis>(MF); + MachineCSEImpl Impl(&MDT, &MBFI); + bool Changed = Impl.run(MF); + if (!Changed) + return PreservedAnalyses::all(); + + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve<MachineLoopAnalysis>(); + PA.preserve<MachineBlockFrequencyAnalysis>(); + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + MachineDominatorTree &MDT = + getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + MachineBlockFrequencyInfo &MBFI = + getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); + MachineCSEImpl Impl(&MDT, &MBFI); + return Impl.run(MF); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index c0b834650d73b0..11a7752ef7a381 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -311,7 +311,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &EarlyMachineLICMID) return applyDisable(TargetID, DisableMachineLICM); - if (StandardID == &MachineCSEID) + if (StandardID == &MachineCSELegacyID) return applyDisable(TargetID, DisableMachineCSE); if (StandardID == &MachineLICMID) @@ -523,7 +523,7 @@ void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC, DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass) DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass) DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass) - DISABLE_PASS(DisableMachineCSE, MachineCSEPass) + DISABLE_PASS(DisableMachineCSE, MachineCSELegacyPass) DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass) DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass) DISABLE_PASS(DisableMachineSink, MachineSinkingPass) @@ -1305,7 +1305,7 @@ void TargetPassConfig::addMachineSSAOptimization() { addILPOpts(); addPass(&EarlyMachineLICMID); - addPass(&MachineCSEID); + addPass(&MachineCSELegacyID); addPass(&MachineSinkingID); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 63173c4abb8191..4794339bb9ad39 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -100,6 +100,7 @@ #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCSE.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineLoopInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a769bc9e486573..0008ac2b31b300 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1279,7 +1279,7 @@ void GCNPassConfig::addMachineSSAOptimization() { if (isPassEnabled(EnableSDWAPeephole)) { addPass(&SIPeepholeSDWAID); addPass(&EarlyMachineLICMID); - addPass(&MachineCSEID); + addPass(&MachineCSELegacyID); addPass(&SIFoldOperandsLegacyID); } addPass(&DeadMachineInstructionElimID); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index e86d3771bd2f26..57b7fa783c14a7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -462,7 +462,7 @@ void NVPTXPassConfig::addMachineSSAOptimization() { printAndVerify("After ILP optimizations"); addPass(&EarlyMachineLICMID); - addPass(&MachineCSEID); + addPass(&MachineCSELegacyID); addPass(&MachineSinkingID); printAndVerify("After Machine LICM, CSE and Sinking passes"); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir b/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir index 2b69c13174f6c8..015ce5ec2dca60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass machine-cse -verify-machineinstrs -mtriple aarch64-apple-ios %s -o - | FileCheck %s +# RUN: llc -passes machine-cse -mtriple aarch64-apple-ios %s -o - | FileCheck %s --- name: irtranslated legalized: false diff --git a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir index 8395a7619fbb46..5ebc2f61eaafbe 100644 --- a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir +++ b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -run-pass=machine-cse -mtriple=aarch64 -mattr=+sve -o - %s | FileCheck %s +# RUN: llc -passes=machine-cse -mtriple=aarch64 -mattr=+sve -o - %s | FileCheck %s --- name: pfalse tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir index 684b5ec3883b26..6eb4df2b48700f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/no-cse-nonlocal-convergent-instrs.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - -run-pass=machine-cse %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - -passes=machine-cse %s | FileCheck %s # LLVM's current definition of `isConvergent` does not necessarily prove that # non-local CSE is illegal. The following test extends the definition of diff --git a/llvm/test/CodeGen/AMDGPU/copyprop_regsequence_with_undef.mir b/llvm/test/CodeGen/AMDGPU/copyprop_regsequence_with_undef.mir index 1e12a3b22e9a4e..fee1391d150f98 100644 --- a/llvm/test/CodeGen/AMDGPU/copyprop_regsequence_with_undef.mir +++ b/llvm/test/CodeGen/AMDGPU/copyprop_regsequence_with_undef.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn -run-pass=machine-cse -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn -passes=machine-cse -o - %s | FileCheck %s # Test to ensure that this does not crash on undefs --- diff --git a/llvm/test/CodeGen/AMDGPU/machine-cse-ssa.mir b/llvm/test/CodeGen/AMDGPU/machine-cse-ssa.mir index 89b204d715dedb..d32737f05a9b0e 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-cse-ssa.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-cse-ssa.mir @@ -1,8 +1,10 @@ # REQUIRES: asserts -# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-cse -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-cse -o /dev/null %s 2>&1 | FileCheck -check-prefixes=ERR,ERR-LEGACY %s +# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machine-cse -o /dev/null %s 2>&1 | FileCheck -check-prefixes=ERR,ERR-NPM %s -# ERR: MachineFunctionProperties required by Machine Common Subexpression Elimination pass are not met by function not_ssa. -# ERR-NEXT: Required properties: IsSSA +# ERR-LEGACY: MachineFunctionProperties required by Machine Common Subexpression Elimination pass are not met by function not_ssa. +# ERR-NPM: MachineFunctionProperties required by MachineCSEPass pass are not met by function not_ssa. +# ERR: Required properties: IsSSA # ERR-NEXT: Current properties: NoPHIs --- diff --git a/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir index 32f5e0172047e4..0e9459238ce9d7 100644 --- a/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir +++ b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc %s -o - -mtriple=powerpc-unknown-unknown -run-pass=machine-cse -verify-machineinstrs | FileCheck %s +# RUN: llc %s -o - -mtriple=powerpc-unknown-unknown -passes=machine-cse | FileCheck %s --- | define void @can_pre() { entry: diff --git a/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir b/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir index e4db7abeea354e..cee5c24847f34a 100644 --- a/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir +++ b/llvm/test/CodeGen/Thumb/machine-cse-deadreg.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass=machine-cse -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=machine-cse -o - %s | FileCheck %s --- | target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/CodeGen/Thumb/machine-cse-physreg.mir b/llvm/test/CodeGen/Thumb/machine-cse-physreg.mir index 2fa22feb4e1b32..58e1eca22711a5 100644 --- a/llvm/test/CodeGen/Thumb/machine-cse-physreg.mir +++ b/llvm/test/CodeGen/Thumb/machine-cse-physreg.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple thumbv5e -run-pass=machine-cse -o - %s | FileCheck %s +# RUN: llc -mtriple thumbv5e -passes=machine-cse -o - %s | FileCheck %s # This is a contrived example made to expose a bug in # MachineCSE, see PR32538. diff --git a/llvm/test/CodeGen/X86/cse-two-preds.mir b/llvm/test/CodeGen/X86/cse-two-preds.mir index 6479747daf4266..e6f04a6ce66d43 100644 --- a/llvm/test/CodeGen/X86/cse-two-preds.mir +++ b/llvm/test/CodeGen/X86/cse-two-preds.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=x86_64 -verify-machineinstrs --run-pass=machine-cse -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64 -passes=machine-cse -o - %s | FileCheck %s --- | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/MIR/X86/machine-cse.mir b/llvm/test/DebugInfo/MIR/X86/machine-cse.mir index 120dbdf850cc45..9bcb4408312a66 100644 --- a/llvm/test/DebugInfo/MIR/X86/machine-cse.mir +++ b/llvm/test/DebugInfo/MIR/X86/machine-cse.mir @@ -1,4 +1,5 @@ # RUN: llc %s -o - -run-pass=machine-cse -mtriple=x86_64-- | FileCheck %s +# RUN: llc %s -o - -passes=machine-cse -mtriple=x86_64-- | FileCheck %s # # This test examines machine-cse's behaviour when dealing with copy propagation, # the code for which is lifted from test/CodeGen/X86/machine-cse.ll. There are >From e1ee3aca9fd46c0cdc1e3ee6d03862f3823f3487 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan <christudasan.devada...@amd.com> Date: Fri, 30 Aug 2024 00:05:30 +0530 Subject: [PATCH 2/3] preserve DT. --- llvm/lib/CodeGen/MachineCSE.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 2ab95fb4d9641a..8e9fcccff77645 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -970,6 +970,7 @@ PreservedAnalyses MachineCSEPass::run(MachineFunction &MF, auto PA = getMachineFunctionPassPreservedAnalyses(); PA.preserve<MachineLoopAnalysis>(); + PA.preserve<MachineDominatorTreeAnalysis>(); PA.preserve<MachineBlockFrequencyAnalysis>(); PA.preserveSet<CFGAnalyses>(); return PA; >From db508d27147404929cef4b61e7a5e05f69667dfb Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan <christudasan.devada...@amd.com> Date: Fri, 30 Aug 2024 00:26:22 +0530 Subject: [PATCH 3/3] comment padded for 80 chars. --- llvm/include/llvm/CodeGen/MachineCSE.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/MachineCSE.h b/llvm/include/llvm/CodeGen/MachineCSE.h index 7440068e3e6f46..f83c25bf391207 100644 --- a/llvm/include/llvm/CodeGen/MachineCSE.h +++ b/llvm/include/llvm/CodeGen/MachineCSE.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/MachineCSE.h -----------------*- C++ -*-===// +//===- llvm/CodeGen/MachineCSE.h --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits