llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Ruiling, Song (ruiling) <details> <summary>Changes</summary> The address calculation may happen on i32 and be sign extended to the i64 offset. --- Full diff: https://github.com/llvm/llvm-project/pull/178608.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+27-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+47-12) - (modified) llvm/test/CodeGen/AMDGPU/load-saddr-offset-imm.ll (+6-10) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 1446c84ef733b..1d6a7b4988528 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1981,6 +1981,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr, bool NeedIOffset) const { int64_t ImmOffset = 0; ScaleOffset = false; + const SIInstrInfo *TII = Subtarget->getInstrInfo(); // Match the immediate offset first, which canonically is moved as low as // possible. @@ -1988,7 +1989,6 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue LHS, RHS; if (isBaseWithConstantOffset64(Addr, LHS, RHS)) { int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue(); - const SIInstrInfo *TII = Subtarget->getInstrInfo(); if (NeedIOffset && TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, @@ -2037,13 +2037,37 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr, LHS = Addr.getOperand(0); if (!LHS->isDivergent()) { - // add (i64 sgpr), (*_extend (i32 vgpr)) RHS = Addr.getOperand(1); - ScaleOffset = SelectScaleOffset(N, RHS, Subtarget->hasSignedGVSOffset()); + if (SDValue ExtRHS = matchExtFromI32orI32( RHS, Subtarget->hasSignedGVSOffset(), CurDAG)) { + // add (i64 sgpr), (*_extend (scale (i32 vgpr))) SAddr = LHS; VOffset = ExtRHS; + if (NeedIOffset && !ImmOffset && + CurDAG->isBaseWithConstantOffset(ExtRHS)) { + // add (i64 sgpr), (*_extend (add (scale (i32 vgpr)), (i32 imm))) + int64_t COffset = + cast<ConstantSDNode>(ExtRHS.getOperand(1))->getSExtValue(); + if (TII->isLegalFLATOffset(COffset, AMDGPUAS::GLOBAL_ADDRESS, + SIInstrFlags::FlatGlobal)) { + VOffset = ExtRHS.getOperand(0); + ImmOffset = + cast<ConstantSDNode>(ExtRHS.getOperand(1))->getSExtValue(); + } + } + + ScaleOffset = + SelectScaleOffset(N, VOffset, Subtarget->hasSignedGVSOffset()); + } else { + // add (i64 sgpr), (scale (*_extend (i32 vgpr))) + ScaleOffset = + SelectScaleOffset(N, RHS, Subtarget->hasSignedGVSOffset()); + if (SDValue ExtRHS = matchExtFromI32orI32( + RHS, Subtarget->hasSignedGVSOffset(), CurDAG)) { + SAddr = LHS; + VOffset = ExtRHS; + } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e239e6f56cb44..6fdcca3443588 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -5843,24 +5843,59 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root, // It's possible voffset is an SGPR here, but the copy to VGPR will be // inserted later. - bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset, - Subtarget->hasSignedGVSOffset()); if (Register VOffset = matchExtendFromS32OrS32( PtrBaseOffset, Subtarget->hasSignedGVSOffset())) { + if (NeedIOffset && !ImmOffset) { + MachineInstr *VOffsetDef = getDefIgnoringCopies(VOffset, *MRI); + if (VOffsetDef->getOpcode() == TargetOpcode::G_ADD) { + Register RHS = VOffsetDef->getOperand(2).getReg(); + std::optional<ValueAndVReg> RHSValReg = + getIConstantVRegValWithLookThrough(RHS, *MRI); + if (RHSValReg && + TII.isLegalFLATOffset(RHSValReg->Value.getSExtValue(), + AMDGPUAS::GLOBAL_ADDRESS, + SIInstrFlags::FlatGlobal)) { + VOffset = VOffsetDef->getOperand(1).getReg(); + ImmOffset = RHSValReg->Value.getSExtValue(); + } + } + } + + bool ScaleOffset = + selectScaleOffset(Root, VOffset, Subtarget->hasSignedGVSOffset()); if (NeedIOffset) - return {{[=](MachineInstrBuilder &MIB) { // saddr - MIB.addReg(SAddr); - }, - [=](MachineInstrBuilder &MIB) { // voffset - MIB.addReg(VOffset); - }, - [=](MachineInstrBuilder &MIB) { // offset - MIB.addImm(ImmOffset); - }, - [=](MachineInstrBuilder &MIB) { // cpol + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(CPolBits | (ScaleOffset ? AMDGPU::CPol::SCAL : 0)); }}}; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, + [=](MachineInstrBuilder &MIB) { + MIB.addImm(CPolBits | + (ScaleOffset ? AMDGPU::CPol::SCAL : 0)); + }}}; + } else { + bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset, + Subtarget->hasSignedGVSOffset()); + if (Register VOffset = matchExtendFromS32OrS32( + PtrBaseOffset, Subtarget->hasSignedGVSOffset())) + if (NeedIOffset) + return {{[=](MachineInstrBuilder &MIB) { // saddr + MIB.addReg(SAddr); + }, + [=](MachineInstrBuilder &MIB) { // voffset + MIB.addReg(VOffset); + }, + [=](MachineInstrBuilder &MIB) { // offset + MIB.addImm(ImmOffset); + }, + [=](MachineInstrBuilder &MIB) { // cpol + MIB.addImm(CPolBits | + (ScaleOffset ? AMDGPU::CPol::SCAL : 0)); + }}}; return {{[=](MachineInstrBuilder &MIB) { // saddr MIB.addReg(SAddr); }, diff --git a/llvm/test/CodeGen/AMDGPU/load-saddr-offset-imm.ll b/llvm/test/CodeGen/AMDGPU/load-saddr-offset-imm.ll index a1e229d09b777..fd26847d83cf8 100644 --- a/llvm/test/CodeGen/AMDGPU/load-saddr-offset-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/load-saddr-offset-imm.ll @@ -10,8 +10,8 @@ define amdgpu_ps <2 x float> @global_load_saddr_offset_imm(ptr addrspace(1) inre ; GFX12-SDAG: ; %bb.0: ; GFX12-SDAG-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80 -; GFX12-SDAG-NEXT: global_load_b64 v[0:1], v0, s[2:3] +; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX12-SDAG-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; @@ -19,9 +19,7 @@ define amdgpu_ps <2 x float> @global_load_saddr_offset_imm(ptr addrspace(1) inre ; GFX1250-SDAG: ; %bb.0: ; GFX1250-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-SDAG-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80 -; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v0, s[2:3] +; GFX1250-SDAG-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128 scale_offset ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: ; return to shader part epilog ; @@ -29,8 +27,8 @@ define amdgpu_ps <2 x float> @global_load_saddr_offset_imm(ptr addrspace(1) inre ; GFX12-GISEL: ; %bb.0: ; GFX12-GISEL-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-GISEL-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80 -; GFX12-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] +; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 +; GFX12-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX12-GISEL-NEXT: ; return to shader part epilog ; @@ -38,9 +36,7 @@ define amdgpu_ps <2 x float> @global_load_saddr_offset_imm(ptr addrspace(1) inre ; GFX1250-GISEL: ; %bb.0: ; GFX1250-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-GISEL-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_lshl_add_u32 v0, v0, 3, 0x80 -; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] +; GFX1250-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:128 scale_offset ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog %v = tail call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) `````````` </details> https://github.com/llvm/llvm-project/pull/178608 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
