================
@@ -19,27 +19,16 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr 
addrspace(1) %out) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
-; GFX11-TRUE16-LABEL: store_inline_imm_neg_0.0_i16:
-; GFX11-TRUE16:       ; %bb.0:
-; GFX11-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: 
[0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
-; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, 0x8000 ; encoding: 
[0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff]
-; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: 
[0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
-; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
-; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
-; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: 
[0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
-; GFX11-TRUE16-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: 
[0x00,0x00,0x7c,0xbc]
-; GFX11-TRUE16-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
-;
-; GFX11-FAKE16-LABEL: store_inline_imm_neg_0.0_i16:
-; GFX11-FAKE16:       ; %bb.0:
-; GFX11-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: 
[0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
-; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: 
[0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
-; GFX11-FAKE16-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: 
[0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
-; GFX11-FAKE16-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
-; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
-; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: 
[0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
-; GFX11-FAKE16-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: 
[0x00,0x00,0x7c,0xbc]
-; GFX11-FAKE16-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: 
[0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: 
[0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
----------------
rampitec wrote:

So yes, the problem if that we do not have t16 version of 
`BUFFER_STORE_SHORT_OFFSET`. Look how does it work without this patch:
```
  %19:vgpr_16 = V_MOV_B16_t16_e64 0, -32768, 0, implicit $exec
  %20:vgpr_32 = COPY %19:vgpr_16
  BUFFER_STORE_SHORT_OFFSET killed %20:vgpr_32, killed %18:sgpr_128, 0, 0, 0, 
0, implicit $exec :: (volatile store (s16) into %ir.out.load, addrspace 1)
```
So hi16 is also clobbered, and we also have a `COPY` with the RC mismatch.

https://github.com/llvm/llvm-project/pull/173318
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to