https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102806
None >From 56fc9f47bd12696b13a677ee92c83a85cbf09466 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Sun, 11 Aug 2024 12:57:27 +0400 Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 17 ++- .../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 110 +++++++++++------- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- .../AMDGPU/amdgpu-late-codegenprepare.ll | 1 + 5 files changed, 88 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5b8d37a8ae7944..2a6b5a10a5d464 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -18,6 +18,7 @@ namespace llvm { class AMDGPUTargetMachine; +class GCNTargetMachine; class TargetMachine; // GlobalISel passes @@ -54,7 +55,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); -FunctionPass *createAMDGPULateCodeGenPreparePass(); +FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * @@ -273,6 +274,16 @@ class AMDGPUCodeGenPreparePass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +class AMDGPULateCodeGenPreparePass + : public PassInfoMixin<AMDGPULateCodeGenPreparePass> { +private: + const GCNTargetMachine &TM; + +public: + AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {}; + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; + class AMDGPULowerKernelArgumentsPass : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { private: @@ -329,8 +340,8 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; -void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); -extern char &AMDGPULateCodeGenPrepareID; +void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); +extern char &AMDGPULateCodeGenPrepareLegacyID; FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 7bf5170794cd9e..36dfebacaed686 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -42,10 +42,10 @@ static cl::opt<bool> namespace { class AMDGPULateCodeGenPrepare - : public FunctionPass, - public InstVisitor<AMDGPULateCodeGenPrepare, bool> { + : public InstVisitor<AMDGPULateCodeGenPrepare, bool> { Module *Mod = nullptr; const DataLayout *DL = nullptr; + const GCNSubtarget &ST; AssumptionCache *AC = nullptr; UniformityInfo *UA = nullptr; @@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare SmallVector<WeakTrackingVH, 8> DeadInsts; public: - static char ID; - - AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} - - StringRef getPassName() const override { - return "AMDGPU IR late optimizations"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetPassConfig>(); - AU.addRequired<AssumptionCacheTracker>(); - AU.addRequired<UniformityInfoWrapperPass>(); - AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - + AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST, + AssumptionCache *AC, UniformityInfo *UA) + : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {} + bool run(Function &F); bool visitInstruction(Instruction &) { return false; } // Check if the specified value is at least DWORD aligned. @@ -148,23 +134,7 @@ class LiveRegOptimizer { } // end anonymous namespace -bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { - Mod = &M; - DL = &Mod->getDataLayout(); - return false; -} - -bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); - const TargetMachine &TM = TPC.getTM<TargetMachine>(); - const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); - - AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); - UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); - +bool AMDGPULateCodeGenPrepare::run(Function &F) { // "Optimize" the virtual regs that cross basic block boundaries. When // building the SelectionDAG, vectors of illegal types that cross basic blocks // will be scalarized and widened, with each scalar living in its @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { return true; } -INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +PreservedAnalyses +AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { + const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); + + AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F); + UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F); + + AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI); + + bool Changed = Impl.run(F); + + PreservedAnalyses PA = PreservedAnalyses::none(); + if (!Changed) + return PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +class AMDGPULateCodeGenPrepareLegacy : public FunctionPass { +public: + static char ID; + + AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {} + + StringRef getPassName() const override { + return "AMDGPU IR late optimizations"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetPassConfig>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<UniformityInfoWrapperPass>(); + AU.setPreservesAll(); + } + + bool runOnFunction(Function &F) override; +}; + +bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + const TargetMachine &TM = TPC.getTM<TargetMachine>(); + const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); + + AssumptionCache &AC = + getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + UniformityInfo &UI = + getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); + + AMDGPULateCodeGenPrepare Impl(*F.getParent(), ST, &AC, &UI); + + return Impl.run(F); +} + +INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE, "AMDGPU IR late optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) -INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE, "AMDGPU IR late optimizations", false, false) -char AMDGPULateCodeGenPrepare::ID = 0; +char AMDGPULateCodeGenPrepareLegacy::ID = 0; -FunctionPass *llvm::createAMDGPULateCodeGenPreparePass() { - return new AMDGPULateCodeGenPrepare(); +FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() { + return new AMDGPULateCodeGenPrepareLegacy(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 7188c8953254c0..1a5d7f2de1ceca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -35,6 +35,9 @@ MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) FUNCTION_PASS("amdgpu-codegenprepare", AMDGPUCodeGenPreparePass(*this)) FUNCTION_PASS("amdgpu-image-intrinsic-opt", AMDGPUImageIntrinsicOptimizerPass(*this)) +FUNCTION_PASS("amdgpu-late-codegenprepare", + AMDGPULateCodeGenPreparePass( + *static_cast<const GCNTargetMachine *>(this))) FUNCTION_PASS("amdgpu-lower-kernel-arguments", AMDGPULowerKernelArgumentsPass(*this)) FUNCTION_PASS("amdgpu-lower-kernel-attributes", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 62cf9c6cd61140..2d7857e8c5b527 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -432,7 +432,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUPromoteAllocaToVectorPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); - initializeAMDGPULateCodeGenPreparePass(*PR); + initializeAMDGPULateCodeGenPrepareLegacyPass(*PR); initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); initializeAMDGPULowerModuleLDSLegacyPass(*PR); initializeAMDGPULowerBufferFatPointersPass(*PR); @@ -1207,7 +1207,7 @@ bool GCNPassConfig::addPreISel() { addPass(createSinkingPass()); if (TM->getOptLevel() > CodeGenOptLevel::None) - addPass(createAMDGPULateCodeGenPreparePass()); + addPass(createAMDGPULateCodeGenPrepareLegacyPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll index fa68722ff67414..a5bea7cd738f1d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX12 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-late-codegenprepare %s | FileCheck %s -check-prefix=GFX9 ; Make sure we don't crash when trying to create a bitcast between ; address spaces _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits