https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/123695
>From b85cc524ef390d2680359d2ccc7085af11eb2eaf Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Tue, 21 Jan 2025 06:30:07 +0000 Subject: [PATCH] [AMDGPU][NewPM] Port SILowerWWMCopies to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 +- llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp | 81 +++++++++++++------ llvm/lib/Target/AMDGPU/SILowerWWMCopies.h | 22 +++++ .../CodeGen/AMDGPU/si-lower-wwm-copies.mir | 43 ++++++++++ 6 files changed, 127 insertions(+), 31 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SILowerWWMCopies.h create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5d9a830f041a74..5f3db283c9b447 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -180,8 +180,8 @@ extern char &SIFixSGPRCopiesLegacyID; void initializeSIFixVGPRCopiesPass(PassRegistry &); extern char &SIFixVGPRCopiesID; -void initializeSILowerWWMCopiesPass(PassRegistry &); -extern char &SILowerWWMCopiesID; +void initializeSILowerWWMCopiesLegacyPass(PassRegistry &); +extern char &SILowerWWMCopiesLegacyID; void initializeSILowerI1CopiesLegacyPass(PassRegistry &); extern char &SILowerI1CopiesLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 09a39d23d801b9..79c19646984e8e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) +MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass()) MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass()) MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 53ec80b8f72049..5dd296152cb9eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -40,6 +40,7 @@ #include "SILoadStoreOptimizer.h" #include "SILowerControlFlow.h" #include "SILowerSGPRSpills.h" +#include "SILowerWWMCopies.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "SIOptimizeExecMasking.h" @@ -482,7 +483,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); initializeAMDGPURegBankSelectPass(*PR); initializeAMDGPURegBankLegalizePass(*PR); - initializeSILowerWWMCopiesPass(*PR); + initializeSILowerWWMCopiesLegacyPass(*PR); initializeAMDGPUMarkLastScratchLoadPass(*PR); initializeSILowerSGPRSpillsLegacyPass(*PR); initializeSIFixSGPRCopiesLegacyPass(*PR); @@ -1581,7 +1582,7 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { // For allocating other wwm register operands. addPass(createWWMRegAllocPass(false)); - addPass(&SILowerWWMCopiesID); + addPass(&SILowerWWMCopiesLegacyID); addPass(&AMDGPUReserveWWMRegsID); // For allocating per-thread VGPRs. @@ -1617,7 +1618,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // For allocating other whole wave mode registers. addPass(createWWMRegAllocPass(true)); - addPass(&SILowerWWMCopiesID); + addPass(&SILowerWWMCopiesLegacyID); addPass(createVirtRegRewriter(false)); addPass(&AMDGPUReserveWWMRegsID); diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp index d80a5f958273a9..ef384c2a1a2150 100644 --- a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp @@ -15,6 +15,7 @@ // //===----------------------------------------------------------------------===// +#include "SILowerWWMCopies.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -30,12 +31,30 @@ using namespace llvm; namespace { -class SILowerWWMCopies : public MachineFunctionPass { +class SILowerWWMCopies { +public: + SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM) + : LIS(LIS), Indexes(SI), VRM(VRM) {} + bool run(MachineFunction &MF); + +private: + bool isSCCLiveAtMI(const MachineInstr &MI); + void addToWWMSpills(MachineFunction &MF, Register Reg); + + LiveIntervals *LIS; + SlotIndexes *Indexes; + VirtRegMap *VRM; + const SIRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + SIMachineFunctionInfo *MFI; +}; + +class SILowerWWMCopiesLegacy : public MachineFunctionPass { public: static char ID; - SILowerWWMCopies() : MachineFunctionPass(ID) { - initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); + SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) { + initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -49,31 +68,20 @@ class SILowerWWMCopies : public MachineFunctionPass { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } - -private: - bool isSCCLiveAtMI(const MachineInstr &MI); - void addToWWMSpills(MachineFunction &MF, Register Reg); - - LiveIntervals *LIS; - SlotIndexes *Indexes; - VirtRegMap *VRM; - const SIRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - SIMachineFunctionInfo *MFI; }; } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", +INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) -INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, - false) +INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies", + false, false) -char SILowerWWMCopies::ID = 0; +char SILowerWWMCopiesLegacy::ID = 0; -char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; +char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID; bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { // We can't determine the liveness info if LIS isn't available. Early return @@ -93,23 +101,44 @@ void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { if (Reg.isPhysical()) return; + // FIXME: VRM may be null here. MCRegister PhysReg = VRM->getPhys(Reg); assert(PhysReg && "should have allocated a physical register"); MFI->allocateWWMSpill(MF, PhysReg); } -bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { +bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) { + auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); + auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; + + auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); + auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; + + auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>(); + auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr; + + SILowerWWMCopies Impl(LIS, Indexes, VRM); + return Impl.run(MF); +} + +PreservedAnalyses +SILowerWWMCopiesPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF); + auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF); + auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF); + + SILowerWWMCopies Impl(LIS, Indexes, VRM); + Impl.run(MF); + return PreservedAnalyses::all(); +} + +bool SILowerWWMCopies::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); MFI = MF.getInfo<SIMachineFunctionInfo>(); - auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); - LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; - auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>(); - Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; - auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>(); - VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr; TRI = ST.getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h new file mode 100644 index 00000000000000..cfc81009017603 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.h @@ -0,0 +1,22 @@ +//===- SILowerWWMCopies.h ---------------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H +#define LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SILowerWWMCopiesPass : public PassInfoMixin<SILowerWWMCopiesPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SILOWERWWMCOPIES_H diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir b/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir new file mode 100644 index 00000000000000..ddae43b8447425 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-lower-wwm-copies.mir @@ -0,0 +1,43 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 + +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=liveintervals,virtregmap,si-lower-wwm-copies -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<live-intervals>,require<virtregmap>,si-lower-wwm-copies" -o - %s | FileCheck %s + +# Check for two cases of $scc being live and dead. +--- +name: lower-wwm-copies +registers: + - { id: 1, class: vgpr_32, flags: [ WWM_REG ]} +machineFunctionInfo: + sgprForEXECCopy: '$sgpr2_sgpr3' +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: lower-wwm-copies + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: S_CMP_EQ_U32 [[DEF]], 0, implicit-def $scc + ; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 killed $exec + ; CHECK-NEXT: $exec = S_MOV_B64 -1 + ; CHECK-NEXT: $vgpr1 = COPY $vgpr0 + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3 + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $vgpr2 = COPY $vgpr1 + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3 + bb.0: + liveins: $vgpr0, $scc + %0:sgpr_32 = IMPLICIT_DEF + S_CMP_EQ_U32 %0, 0, implicit-def $scc + $vgpr1 = WWM_COPY $vgpr0 + S_CBRANCH_SCC1 %bb.1, implicit killed $scc + + bb.1: + liveins: $vgpr1 + $vgpr2 = WWM_COPY $vgpr1 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits