https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127484
It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. >From b59c65f9ae4d20211cc01e05743505a5f493ff81 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 17 Feb 2025 17:12:22 +0700 Subject: [PATCH] AMDGPU: Extract lambda used in foldImmediate into a helper function It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 ++++++++++++++++---------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 ++++ 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4cb07b1df04ce..b5f36f67a37ac 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: + return Imm; + case AMDGPU::sub0: + return Lo_32(Imm); + case AMDGPU::sub1: + return Hi_32(Imm); + case AMDGPU::lo16: + return SignExtend64<16>(Imm); + case AMDGPU::hi16: + return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: + return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: + return SignExtend64<16>(Imm >> 48); + default: + return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { - switch (UseOp.getSubReg()) { - default: - return Imm; - case AMDGPU::sub0: - return Lo_32(Imm); - case AMDGPU::sub1: - return Hi_32(Imm); - case AMDGPU::lo16: - return SignExtend64<16>(Imm); - case AMDGPU::hi16: - return SignExtend64<16>(Imm >> 16); - case AMDGPU::sub1_lo16: - return SignExtend64<16>(Imm >> 32); - case AMDGPU::sub1_hi16: - return SignExtend64<16>(Imm >> 48); - } - }; - assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); unsigned Opc = UseMI.getOpcode(); @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, : AMDGPU::V_MOV_B32_e32 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32; - APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)), + + std::optional<int64_t> SubRegImm = + extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg()); + + APInt Imm(Is64Bit ? 64 : 32, *SubRegImm, /*isSigned=*/true, /*implicitTrunc=*/true); if (RI.isAGPR(*MRI, DstReg)) { @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; - const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); + const std::optional<int64_t> SubRegImm = extractSubregFromImm( + Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg()); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); - Src1->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(*SubRegImm); removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + const std::optional<int64_t> SubRegImm = + extractSubregFromImm(Imm, Src2->getSubReg()); + // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(getImmFor(*Src2)); + Src2->ChangeToImmediate(*SubRegImm); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ddd15e1766f70..06dbdf65e458f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use from a constant + /// materialized in a super register. + /// + /// e.g. %imm = S_MOV_B64 K[0:63] + /// USE %imm.sub1 + /// This will return k[32:63] + static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal, + unsigned SubRegIndex); + bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits