https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130063
>From 33a5201fec71751cec72bf63fd80b873961ac247 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Thu, 6 Mar 2025 05:26:49 +0000 Subject: [PATCH] [AMDGPU][NPM] Port SILateBranchLowering to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 10 ++++- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 5 ++- .../Target/AMDGPU/SILateBranchLowering.cpp | 40 ++++++++++++++----- llvm/test/CodeGen/AMDGPU/early-term.mir | 2 + llvm/test/CodeGen/AMDGPU/readlane_exec0.mir | 1 + 6 files changed, 46 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 61df9191cbbb9..6f11a200bac64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID; void initializeSIPreEmitPeepholePass(PassRegistry &); extern char &SIPreEmitPeepholeID; -void initializeSILateBranchLoweringPass(PassRegistry &); +void initializeSILateBranchLoweringLegacyPass(PassRegistry &); extern char &SILateBranchLoweringPassID; void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &); @@ -383,6 +383,14 @@ class SIInsertHardClausesPass : public PassInfoMixin<SIInsertHardClausesPass> { MachineFunctionAnalysisManager &MFAM); }; +class SILateBranchLoweringPass + : public PassInfoMixin<SILateBranchLoweringPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } +}; + FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 3eabe087a8a33..318aad5590cda 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass()) MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass()) MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) +MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) @@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6c24fe5f1441a..b9d62cc9e4b63 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); initializeSIPreEmitPeepholePass(*PR); - initializeSILateBranchLoweringPass(*PR); + initializeSILateBranchLoweringLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR); @@ -2161,7 +2161,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { // TODO: addPass(SIInsertHardClausesPass()); } - // addPass(SILateBranchLoweringPass()); + addPass(SILateBranchLoweringPass()); + if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) { // TODO: addPass(AMDGPUSetWavePriorityPass()); } diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index d02173f57ee37..0f5b6bd9374b0 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachinePassManager.h" using namespace llvm; @@ -23,7 +24,7 @@ using namespace llvm; namespace { -class SILateBranchLowering : public MachineFunctionPass { +class SILateBranchLowering { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; @@ -33,14 +34,23 @@ class SILateBranchLowering : public MachineFunctionPass { void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); public: - static char ID; + SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {} + + bool run(MachineFunction &MF); unsigned MovOpc; Register ExecReg; +}; - SILateBranchLowering() : MachineFunctionPass(ID) {} +class SILateBranchLoweringLegacy : public MachineFunctionPass { +public: + static char ID; + SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); + return SILateBranchLowering(MDT).run(MF); + } StringRef getPassName() const override { return "SI Final Branch Preparation"; @@ -55,15 +65,15 @@ class SILateBranchLowering : public MachineFunctionPass { } // end anonymous namespace -char SILateBranchLowering::ID = 0; +char SILateBranchLoweringLegacy::ID = 0; -INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, +INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE, "SI insert s_cbranch_execz instructions", false, false) -char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; +char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID; static void generateEndPgm(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -144,11 +154,21 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI, MDT->insertEdge(&MBB, EarlyExitBlock); } -bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +llvm::SILateBranchLoweringPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF); + if (!SILateBranchLowering(MDT).run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses() + .preserve<MachineDominatorTreeAnalysis>(); +} + +bool SILateBranchLowering::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); - MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir index 77bc9729ee845..3d75d405a46d3 100644 --- a/llvm/test/CodeGen/AMDGPU/early-term.mir +++ b/llvm/test/CodeGen/AMDGPU/early-term.mir @@ -2,6 +2,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s + --- | define amdgpu_ps void @early_term_scc0_end_block() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir index 6a286eafa6d58..a4c05aa781df7 100644 --- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir +++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -1,4 +1,5 @@ # RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s +# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s # GCN-LABEL: readlane_exec0 # GCN: bb.0 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits