https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/127484
>From 3a2b041e192d8ec5f45734d8ec7321e77e62145c Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 17 Feb 2025 17:12:22 +0700 Subject: [PATCH 1/2] AMDGPU: Extract lambda used in foldImmediate into a helper function It was also too permissive for a more general utilty, only return the original immediate if there is no subregister. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 59 ++++++++++++++++---------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 9 ++++ 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ee5ebd7681b8..07addb38b8711 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const { } } +std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm, + unsigned SubRegIndex) { + switch (SubRegIndex) { + case AMDGPU::NoSubRegister: + return Imm; + case AMDGPU::sub0: + return Lo_32(Imm); + case AMDGPU::sub1: + return Hi_32(Imm); + case AMDGPU::lo16: + return SignExtend64<16>(Imm); + case AMDGPU::hi16: + return SignExtend64<16>(Imm >> 16); + case AMDGPU::sub1_lo16: + return SignExtend64<16>(Imm >> 32); + case AMDGPU::sub1_hi16: + return SignExtend64<16>(Imm >> 48); + default: + return std::nullopt; + } + + llvm_unreachable("covered subregister switch"); +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (!getConstValDefinedInReg(DefMI, Reg, Imm)) return false; - auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t { - switch (UseOp.getSubReg()) { - default: - return Imm; - case AMDGPU::sub0: - return Lo_32(Imm); - case AMDGPU::sub1: - return Hi_32(Imm); - case AMDGPU::lo16: - return SignExtend64<16>(Imm); - case AMDGPU::hi16: - return SignExtend64<16>(Imm >> 16); - case AMDGPU::sub1_lo16: - return SignExtend64<16>(Imm >> 32); - case AMDGPU::sub1_hi16: - return SignExtend64<16>(Imm >> 48); - } - }; - assert(!DefMI.getOperand(0).getSubReg() && "Expected SSA form"); unsigned Opc = UseMI.getOpcode(); @@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, : AMDGPU::V_MOV_B32_e32 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::S_MOV_B32; - APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)), + + std::optional<int64_t> SubRegImm = + extractSubregFromImm(Imm, UseMI.getOperand(1).getSubReg()); + + APInt Imm(Is64Bit ? 64 : 32, *SubRegImm, /*isSigned=*/true, /*implicitTrunc=*/true); if (RI.isAGPR(*MRI, DstReg)) { @@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; - const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); + const std::optional<int64_t> SubRegImm = extractSubregFromImm( + Imm, RegSrc == Src1 ? Src0->getSubReg() : Src1->getSubReg()); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. @@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); - Src1->ChangeToImmediate(Imm); + Src1->ChangeToImmediate(*SubRegImm); removeModOperands(UseMI); UseMI.setDesc(get(NewOpc)); @@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + const std::optional<int64_t> SubRegImm = + extractSubregFromImm(Imm, Src2->getSubReg()); + // ChangingToImmediate adds Src2 back to the instruction. - Src2->ChangeToImmediate(getImmFor(*Src2)); + Src2->ChangeToImmediate(*SubRegImm); // These come before src2. removeModOperands(UseMI); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index ddd15e1766f70..06dbdf65e458f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -401,6 +401,15 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { void removeModOperands(MachineInstr &MI) const; + /// Return the extracted immediate value in a subregister use from a constant + /// materialized in a super register. + /// + /// e.g. %imm = S_MOV_B64 K[0:63] + /// USE %imm.sub1 + /// This will return k[32:63] + static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal, + unsigned SubRegIndex); + bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final; >From 5eada95da0392023247c3e01da21cc73c8c09486 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 17 Feb 2025 23:56:04 +0700 Subject: [PATCH 2/2] Update llvm/lib/Target/AMDGPU/SIInstrInfo.h Co-authored-by: Shilei Tian <i...@tianshilei.me> --- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 06dbdf65e458f..79ecc2a657ed0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -406,7 +406,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { /// /// e.g. %imm = S_MOV_B64 K[0:63] /// USE %imm.sub1 - /// This will return k[32:63] + /// This will return K[32:63] static std::optional<int64_t> extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits