https://github.com/nhaehnle updated https://github.com/llvm/llvm-project/pull/166213
From dd8c2ece4a1287580cec17fff56e8eaa314ffef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <[email protected]> Date: Tue, 7 Oct 2025 12:17:02 -0700 Subject: [PATCH] CodeGen/AMDGPU: Allow 3-address conversion of bundled instructions This is in preparation for future changes in AMDGPU that will make more substantial use of bundles pre-RA. For now, simply test this with degenerate (single-instruction) bundles. commit-id:4a30cb78 --- .../lib/CodeGen/TwoAddressInstructionPass.cpp | 54 ++++++++++-------- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 56 +++++++++++++++++-- llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir | 9 ++- 3 files changed, 87 insertions(+), 32 deletions(-) diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index b99e1c7f19b71..562a6a00045f5 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -794,29 +794,34 @@ bool TwoAddressInstructionImpl::convertInstTo3Addr( if (!NewMI) return false; - LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - - // If the old instruction is debug value tracked, an update is required. - if (auto OldInstrNum = mi->peekDebugInstrNum()) { - assert(mi->getNumExplicitDefs() == 1); - assert(NewMI->getNumExplicitDefs() == 1); - - // Find the old and new def location. - unsigned OldIdx = mi->defs().begin()->getOperandNo(); - unsigned NewIdx = NewMI->defs().begin()->getOperandNo(); - - // Record that one def has been replaced by the other. - unsigned NewInstrNum = NewMI->getDebugInstrNum(); - MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx), - std::make_pair(NewInstrNum, NewIdx)); - } - - MBB->erase(mi); // Nuke the old inst. - for (MachineInstr &MI : MIS) DistanceMap.insert(std::make_pair(&MI, Dist++)); - Dist--; + + if (&*mi == NewMI) { + LLVM_DEBUG(dbgs() << "2addr: CONVERTED IN-PLACE TO 3-ADDR: " << *mi); + } else { + LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + + // If the old instruction is debug value tracked, an update is required. + if (auto OldInstrNum = mi->peekDebugInstrNum()) { + assert(mi->getNumExplicitDefs() == 1); + assert(NewMI->getNumExplicitDefs() == 1); + + // Find the old and new def location. + unsigned OldIdx = mi->defs().begin()->getOperandNo(); + unsigned NewIdx = NewMI->defs().begin()->getOperandNo(); + + // Record that one def has been replaced by the other. + unsigned NewInstrNum = NewMI->getDebugInstrNum(); + MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx), + std::make_pair(NewInstrNum, NewIdx)); + } + + MBB->erase(mi); // Nuke the old inst. + Dist--; + } + mi = NewMI; nmi = std::next(mi); @@ -1329,6 +1334,9 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); + // Give targets a chance to convert bundled instructions. + bool ConvertibleTo3Addr = MI.isConvertibleTo3Addr(MachineInstr::AnyInBundle); + // If the instruction is convertible to 3 Addr, instead // of returning try 3 Addr transformation aggressively and // use this variable to check later. Because it might be better. @@ -1337,7 +1345,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( // addl %esi, %edi // movl %edi, %eax // ret - if (Commuted && !MI.isConvertibleTo3Addr()) + if (Commuted && !ConvertibleTo3Addr) return false; if (shouldOnlyCommute) @@ -1357,7 +1365,7 @@ bool TwoAddressInstructionImpl::tryInstructionTransform( regBKilled = isKilled(MI, regB, true); } - if (MI.isConvertibleTo3Addr()) { + if (ConvertibleTo3Addr) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6ce18ea921a9b..deeb8beb04332 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4047,10 +4047,29 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const { MachineBasicBlock &MBB = *MI.getParent(); + MachineInstr *CandidateMI = &MI; + + if (MI.isBundle()) { + // This is a temporary placeholder for bundle handling that enables us to + // exercise the relevant code paths in the two-address instruction pass. + if (MI.getBundleSize() != 1) + return nullptr; + CandidateMI = MI.getNextNode(); + } + ThreeAddressUpdates U; - MachineInstr *NewMI = convertToThreeAddressImpl(MI, U); + MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U); + if (!NewMI) + return nullptr; - if (NewMI) { + if (MI.isBundle()) { + CandidateMI->eraseFromBundle(); + + for (MachineOperand &MO : MI.all_defs()) { + if (MO.isTied()) + MI.untieRegOperand(MO.getOperandNo()); + } + } else { updateLiveVariables(LV, MI, *NewMI); if (LIS) { LIS->ReplaceMachineInstrInMaps(MI, *NewMI); @@ -4091,7 +4110,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, LV->getVarInfo(DefReg).AliveBlocks.clear(); } - if (LIS) { + if (MI.isBundle()) { + VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg); + if (!VRI.Reads && !VRI.Writes) { + for (MachineOperand &MO : MI.all_uses()) { + if (MO.isReg() && MO.getReg() == DefReg) { + MI.removeOperand(MO.getOperandNo()); + break; + } + } + + if (LIS) + LIS->shrinkToUses(&LIS->getInterval(DefReg)); + } + } else if (LIS) { LiveInterval &DefLI = LIS->getInterval(DefReg); // We cannot delete the original instruction here, so hack out the use @@ -4106,11 +4138,27 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, } } + if (MI.isBundle()) { + VirtRegInfo VRI = AnalyzeVirtRegInBundle(MI, DefReg); + if (!VRI.Reads && !VRI.Writes) { + for (MachineOperand &MIOp : MI.uses()) { + if (MIOp.isReg() && MIOp.getReg() == DefReg) { + MIOp.setIsUndef(true); + MIOp.setReg(DummyReg); + } + } + } + + auto MO = MachineOperand::CreateReg(DummyReg, false); + MO.setIsUndef(true); + MI.addOperand(MO); + } + LIS->shrinkToUses(&DefLI); } } - return NewMI; + return MI.isBundle() ? &MI : NewMI; } MachineInstr * diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir index 696962a88c8b8..8ae50d8e0e071 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-bundle.mir @@ -31,7 +31,7 @@ body: | ... -# This test is an example where conversion to three-address form would be beneficial. +# This test is an example where conversion to three-address form is beneficial. --- name: test_fmac_reuse_bundle body: | @@ -41,11 +41,10 @@ body: | ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GCN-NEXT: BUNDLE implicit-def [[COPY1]], implicit [[DEF]], implicit [[DEF1]], implicit [[COPY1]](tied-def 0), implicit $mode, implicit $exec { - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = V_FMAC_F32_e32 killed [[DEF]], killed [[DEF1]], killed [[COPY1]], implicit $mode, implicit $exec + ; GCN-NEXT: BUNDLE implicit-def %3, implicit [[DEF]], implicit [[DEF1]], implicit [[COPY]], implicit $mode, implicit $exec { + ; GCN-NEXT: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F32_e64 0, killed [[DEF]], 0, killed [[DEF1]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: } - ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_FMA_F32_e64_]], [[COPY]], 0, implicit $exec %2:vgpr_32 = COPY $vgpr0 %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
