https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130065
>From 586bcbc3f1ed6b935770957a4e9c3dacda9904a9 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Thu, 6 Mar 2025 06:20:13 +0000 Subject: [PATCH] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 9 +++++- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 ++--- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 31 ++++++++++++++----- .../AMDGPU/insert-handle-flat-vmem-ds.mir | 1 + ...ort-exec-branches-special-instructions.mir | 1 + .../CodeGen/AMDGPU/set-gpr-idx-peephole.mir | 1 + 7 files changed, 38 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 27ae6d42ec21d..b8f5d85ef0b9a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID; void initializeSILowerControlFlowLegacyPass(PassRegistry &); extern char &SILowerControlFlowLegacyID; -void initializeSIPreEmitPeepholePass(PassRegistry &); +void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &); extern char &SIPreEmitPeepholeID; void initializeSILateBranchLoweringLegacyPass(PassRegistry &); @@ -392,6 +392,13 @@ class SILateBranchLoweringPass static bool isRequired() { return true; } }; +class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } +}; + class AMDGPUSetWavePriorityPass : public PassInfoMixin<AMDGPUSetWavePriorityPass> { public: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 4956897d22fde..f14499d0d3146 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -125,6 +125,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) +MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef MACHINE_FUNCTION_PASS @@ -133,7 +134,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 857af30b348cb..05eb609956199 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -539,7 +539,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIModeRegisterLegacyPass(*PR); initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); - initializeSIPreEmitPeepholePass(*PR); + initializeSIPreEmitPeepholeLegacyPass(*PR); initializeSILateBranchLoweringLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR); @@ -2166,9 +2166,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) addPass(AMDGPUSetWavePriorityPass()); - if (TM.getOptLevel() > CodeGenOptLevel::None) { - // TODO: addPass(SIPreEmitPeepholePass()); - } + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(SIPreEmitPeepholePass()); // The hazard recognizer that runs as part of the post-ra scheduler does not // guarantee to be able handle all hazards correctly. This is because if there diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 2bb70c138a50c..9db2118f2997b 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -24,7 +24,7 @@ using namespace llvm; namespace { -class SIPreEmitPeephole : public MachineFunctionPass { +class SIPreEmitPeephole { private: const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; @@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass { const MachineBasicBlock &To) const; bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); +public: + bool run(MachineFunction &MF); +}; + +class SIPreEmitPeepholeLegacy : public MachineFunctionPass { public: static char ID; - SIPreEmitPeephole() : MachineFunctionPass(ID) { - initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); + SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) { + initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + return SIPreEmitPeephole().run(MF); + } }; } // End anonymous namespace. -INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE, +INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE, "SI peephole optimizations", false, false) -char SIPreEmitPeephole::ID = 0; +char SIPreEmitPeepholeLegacy::ID = 0; -char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID; +char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID; bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // Match: @@ -410,7 +417,15 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI, return true; } -bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +llvm::SIPreEmitPeepholePass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!SIPreEmitPeephole().run(MF)) + return PreservedAnalyses::all(); + return getMachineFunctionPassPreservedAnalyses(); +} + +bool SIPreEmitPeephole::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir index d89f306c96a36..2e8c8ca9c7a6c 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s --- diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir index 20de119471ba3..92a9a195fc4c7 100644 --- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir +++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s # Make sure mandatory skips are not removed around mode defs. --- diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir index 796a70cfe8a39..1d0a6db36ea3b 100644 --- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir +++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX --- name: simple _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits