Author: Petar Avramovic Date: 2024-10-01T08:56:50+02:00 New Revision: 962edd3f71eebdcd781222cdd97a561979894003
URL: https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003 DIFF: https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003.diff LOG: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) Use i32 for offset instead of i16, this way it does not get interpreted as negative 16 bit offset. (cherry picked from commit 83fe85115da9dc25fa270d2ea8140113c8d49670) Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/test/CodeGen/AMDGPU/flat-scratch.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b7471bab128509..7b786ee2641721 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, 0); } - Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16); + Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32); return true; } @@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, return false; if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset)) return false; - Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16); + Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32); return true; } } @@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset)) return false; SAddr = SelectSAddrFI(CurDAG, SAddr); - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); + Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index f040b47428640a..284f1746145225 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa ; ; GFX12-LABEL: sgpr_base_large_offset: ; GFX12: ; %bb.0: ; %entry -; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-NEXT: s_nop 0 @@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa ; ; GFX12-PAL-LABEL: sgpr_base_large_offset: ; GFX12-PAL: ; %bb.0: ; %entry -; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512 ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 ; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-PAL-NEXT: s_nop 0 @@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000 ; GFX12-NEXT: s_and_b32 s0, s0, -4 -; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS +; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-NEXT: s_nop 0 @@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a ; GFX12-PAL: ; %bb.0: ; %entry ; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000 ; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4 -; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS +; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 ; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off ; GFX12-PAL-NEXT: s_nop 0 @@ -5189,7 +5189,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: v_mov_b32_e32 v1, 15 ; GFX12-NEXT: s_add_co_i32 s0, s0, s1 -; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_endpgm ; @@ -5251,7 +5251,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a ; GFX12-PAL: ; %bb.0: ; %bb ; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1 -; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS +; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_storecnt 0x0 ; GFX12-PAL-NEXT: s_endpgm bb: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits