llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) <details> <summary>Changes</summary> Fixes: SWDEV-562450 --- Patch is 44.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165035.diff 6 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp (+13-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir (+68-67) - (modified) llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll (+18-18) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp index 0be1dd0817605..f9f0bc619d9f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp @@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding { static constexpr unsigned BitsPerField = 2; static constexpr unsigned NumFields = 4; static constexpr unsigned FieldMask = (1 << BitsPerField) - 1; + static constexpr unsigned ModeWidth = NumFields * BitsPerField; + static constexpr unsigned ModeMask = (1 << ModeWidth) - 1; using ModeType = PackedVector<unsigned, BitsPerField, std::bitset<BitsPerField * NumFields>>; @@ -152,13 +154,21 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask, CurrentMode |= NewMode; CurrentMask |= Mask; - MostRecentModeSet->getOperand(0).setImm(CurrentMode); + MachineOperand &Op = MostRecentModeSet->getOperand(0); + + // Carry old mode bits from the existing instruction. + int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth); + + Op.setImm(CurrentMode | OldModeBits); return true; } + // Record previous mode into high 8 bits of the immediate. + int64_t OldModeBits = CurrentMode << ModeWidth; + I = handleClause(I); - MostRecentModeSet = - BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)).addImm(NewMode); + MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)) + .addImm(NewMode | OldModeBits); CurrentMode = NewMode; CurrentMask = Mask; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 680e7eb3de6be..844649ebb9ae6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { *OutStreamer); if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) { - unsigned V = MI->getOperand(0).getImm(); + unsigned V = MI->getOperand(0).getImm() & 0xff; OutStreamer->AddComment( " msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) + " src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3)); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 013cfeb364048..28b4da8ab9ebb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) { if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12) - VgprMSBs = Inst.getOperand(0).getImm(); + VgprMSBs = Inst.getOperand(0).getImm() & 0xff; else if (isTerminator(Inst)) VgprMSBs = 0; } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir index 8a70a8acd28d3..32cc398740d62 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir @@ -36,7 +36,7 @@ body: | ; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/ $vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x8a + ; GCN-NEXT: s_set_vgpr_msb 0x458a ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0 ; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/ $vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode @@ -50,7 +50,7 @@ body: | ; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/ $vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xcf + ; GCN-NEXT: s_set_vgpr_msb 0x8acf ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0 ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/ $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir index 2f53c6229ed09..b084ad63a4d5c 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir @@ -22,13 +22,13 @@ body: | $vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec ; Single bit change - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x4101 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 ; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/ $vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: v_rcp_f32_e64 v255, v1 $vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode @@ -40,7 +40,7 @@ body: | ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/ $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/ @@ -48,7 +48,7 @@ body: | ; VOP3 - ; GCN-NEXT: s_set_vgpr_msb 0x55 + ; GCN-NEXT: s_set_vgpr_msb 0x4455 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/ $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode @@ -58,32 +58,32 @@ body: | $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode ; Tuple crossing the 256 boundary - ; GCN-NEXT: s_set_vgpr_msb 17 + ; GCN-NEXT: s_set_vgpr_msb 0x5511 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 ; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/ $vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec ; DPP/tied operand - ; GCN-NEXT: s_set_vgpr_msb 0x45 + ; GCN-NEXT: s_set_vgpr_msb 0x1145 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0 ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 $vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 17 + ; GCN-NEXT: s_set_vgpr_msb 0x4511 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1 ; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 $vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec ; DS (addr, data0, and data1 operands) - ; GCN-NEXT: s_set_vgpr_msb 20 + ; GCN-NEXT: s_set_vgpr_msb 0x1114 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1 ; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1 DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x1400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1 DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec @@ -93,13 +93,13 @@ body: | ; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/ $vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x144 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/ $vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0 $vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec @@ -111,17 +111,17 @@ body: | ; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 64 + ; GCN-NEXT: s_set_vgpr_msb 0x140 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0 ; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1] $vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x4001 ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0 ; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0 $vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: global_store_b32 v[0:1], v2, off GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec @@ -135,13 +135,13 @@ body: | ; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0 ; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN $vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4400 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN $vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr @@ -156,12 +156,12 @@ body: | ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x4041 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0 ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec @@ -171,7 +171,7 @@ body: | ; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4100 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec @@ -183,44 +183,44 @@ body: | ; VGPRs above 512 - ; GCN-NEXT: s_set_vgpr_msb 0xaa + ; GCN-NEXT: s_set_vgpr_msb 0x41aa ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xab + ; GCN-NEXT: s_set_vgpr_msb 0xaaab ; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xae + ; GCN-NEXT: s_set_vgpr_msb 0xabae ; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xba + ; GCN-NEXT: s_set_vgpr_msb 0xaeba ; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3 ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/ $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xea + ; GCN-NEXT: s_set_vgpr_msb 0xbaea ; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2 ; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/ $vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0xff + ; GCN-NEXT: s_set_vgpr_msb 0xeaff ; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3 ; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/ $vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x42 + ; GCN-NEXT: s_set_vgpr_msb 0xff42 ; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0 ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/ $vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec ; Reset - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x4200 ; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0 ; GCN-NEXT: v_fma_f32 v0, v1, v2, v3 $vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode @@ -232,12 +232,12 @@ body: | ; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 11 + ; GCN-NEXT: s_set_vgpr_msb 0xa0b ; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0 ; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x55 + ; GCN-NEXT: s_set_vgpr_msb 0xb55 ; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1 ; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/ early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec @@ -247,6 +247,7 @@ body: | ... # ASM-LABEL: {{^}}vopd: + # DIS-LABEL: <vopd>: --- name: vopd @@ -262,35 +263,35 @@ body: | ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4 $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0x41 + ; GCN-NEXT: s_set_vgpr_msb 0x4041 ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4 $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 4 + ; GCN-NEXT: s_set_vgpr_msb 0x4104 ; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/ $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 1 + ; GCN-NEXT: s_set_vgpr_msb 0x401 ; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3 $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 64 + ; GCN-NEXT: s_set_vgpr_msb 0x140 ; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1 $vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 5 + ; GCN-NEXT: s_set_vgpr_msb 0x4005 ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/ $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode - ; GCN-NEXT: s_set_vgpr_msb 0x44 + ; GCN-NEXT: s_set_vgpr_msb 0x544 ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/ $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 16 + ; GCN-NEXT: s_set_vgpr_msb 0x4410 ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/ $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec - ; GCN-NEXT: s_set_vgpr_msb 0 + ; GCN-NEXT: s_set_vgpr_msb 0x1000 ; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3 $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec @@ -298,7 +299,7 @@ body: | ; GCN-NEXT: v_dual_fma_f32 ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/165035 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
