llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) <details> <summary>Changes</summary> --- Patch is 41.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173318.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+15-1) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/br_cc.f16.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/fma.f16.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/imm16.ll (+127-268) - (modified) llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll (+4-4) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 57e99d9719c83..a7bae73cddfe0 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1326,7 +1326,8 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, case AMDGPU::AV_MOV_B32_IMM_PSEUDO: case AMDGPU::AV_MOV_B64_IMM_PSEUDO: case AMDGPU::S_MOV_B64_IMM_PSEUDO: - case AMDGPU::V_MOV_B64_PSEUDO: { + case AMDGPU::V_MOV_B64_PSEUDO: + case AMDGPU::V_MOV_B16_t16_e32: { const MachineOperand &Src0 = MI.getOperand(1); if (Src0.isImm()) { ImmVal = Src0.getImm(); @@ -1335,6 +1336,15 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI, return false; } + case AMDGPU::V_MOV_B16_t16_e64: { + const MachineOperand &Src0 = MI.getOperand(2); + if (Src0.isImm()) { + ImmVal = Src0.getImm(); + return MI.getOperand(0).getReg() == Reg; + } + + return false; + } case AMDGPU::S_BREV_B32: case AMDGPU::V_BFREV_B32_e32: case AMDGPU::V_BFREV_B32_e64: { @@ -3545,6 +3555,8 @@ static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) { case AMDGPU::V_FMAC_F16_e64: case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: + case AMDGPU::V_FMAC_F16_t16_e32: + case AMDGPU::V_FMAC_F16_fake16_e32: case AMDGPU::V_FMA_F16_e64: return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() ? AMDGPU::V_FMAAK_F16_t16 @@ -3577,6 +3589,8 @@ static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) { case AMDGPU::V_FMAC_F16_e64: case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: + case AMDGPU::V_FMAC_F16_t16_e32: + case AMDGPU::V_FMAC_F16_fake16_e32: case AMDGPU::V_FMA_F16_e64: return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() ? AMDGPU::V_FMAMK_F16_t16 diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1d1e95908fce6..3cd0da3e633a4 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -862,7 +862,7 @@ let SubtargetPredicate = isGFX11Plus in { // Restrict src0 to be VGPR def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, [], /*VOP1Only=*/ 1>; - let isAsCheapAsAMove = 1 in + let isAsCheapAsAMove = 1, isMoveImm = 1 in defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>; defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll index bfef88cdba9ed..2761cba5ea71b 100644 --- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -197,7 +197,7 @@ define amdgpu_kernel void @br_cc_f16_imm_a( ; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %one -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800 ; GFX11-TRUE16-NEXT: .LBB1_2: ; %two ; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6 ; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7 @@ -303,7 +303,7 @@ define amdgpu_kernel void @br_cc_f16_imm_b( ; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2 ; GFX11-TRUE16-NEXT: ; %bb.1: ; %two -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800 ; GFX11-TRUE16-NEXT: .LBB2_2: ; %one ; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6 ; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7 diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll index 20db029aad27f..5c5dbe4f12e78 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -415,11 +415,11 @@ define i32 @test_D139469_f16(half %arg) { ; GFX11-SDAG-TRUE16-LABEL: test_D139469_f16: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %bb ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x291e ; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_min_f16_e32 v0.l, v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x211e +; GFX11-SDAG-TRUE16-NEXT: v_min_f16_e32 v0.l, v1.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo @@ -472,11 +472,11 @@ define i32 @test_D139469_f16(half %arg) { ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e +; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x291e ; GFX12-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l -; GFX12-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v1.l, v0.h +; GFX12-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x211e +; GFX12-SDAG-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v1.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0) diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll index 91b9773ad159e..676773a94d29a 100644 --- a/llvm/test/CodeGen/AMDGPU/imm16.ll +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -19,27 +19,16 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_inline_imm_neg_0.0_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_inline_imm_neg_0.0_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +; GFX11-LABEL: store_inline_imm_neg_0.0_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] +; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] +; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] +; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80] +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] +; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] ; ; VI-LABEL: store_inline_imm_neg_0.0_i16: ; VI: ; %bb.0: @@ -77,25 +66,15 @@ define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) { ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_inline_imm_0.0_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 ; encoding: [0x80,0x38,0x00,0x7e] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_inline_imm_0.0_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +; GFX11-LABEL: store_inline_imm_0.0_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] +; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] +; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] +; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] +; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] ; ; VI-LABEL: store_inline_imm_0.0_f16: ; VI: ; %bb.0: @@ -131,25 +110,15 @@ define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) { ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_imm_neg_0.0_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x80,0xff,0xff] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_imm_neg_0.0_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +; GFX11-LABEL: store_imm_neg_0.0_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] +; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] +; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] +; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] +; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] ; ; VI-LABEL: store_imm_neg_0.0_f16: ; VI: ; %bb.0: @@ -185,25 +154,15 @@ define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) { ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_inline_imm_0.5_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x38,0x00,0x00] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_inline_imm_0.5_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +; GFX11-LABEL: store_inline_imm_0.5_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] +; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] +; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] +; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] +; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] ; ; VI-LABEL: store_inline_imm_0.5_f16: ; VI: ; %bb.0: @@ -239,25 +198,15 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) { ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_inline_imm_m_0.5_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0xb800 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0xb8,0xff,0xff] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_inline_imm_m_0.5_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] +; GFX11-LABEL: store_inline_imm_m_0.5_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] +; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] +; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] +; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] +; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] +; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] +; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] ; ; VI-LABEL: store_inline_imm_m_0.5_f16: ; VI: ; %bb.0: @@ -293,25 +242,15 @@ define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) { ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] ; -; GFX11-TRUE16-LABEL: store_inline_imm_1.0_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c00 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x3c,0x00,0x00] -; GFX11-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-TRUE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-TRUE16-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] -; -; GFX11-FAKE16-LABEL: store_inline_imm_1.0_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] -; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] -; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] -; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] -; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] -; GFX11-FAKE16-NEXT: s_endpgm ; encoding: [0x00,... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/173318 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
