https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/165035

>From 6bcab853c394f67ba4f0b317ac7c24030c20dd4d Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <[email protected]>
Date: Fri, 24 Oct 2025 13:06:11 -0700
Subject: [PATCH] [AMDGPU] Record old VGPR MSBs in the high bits of
 s_set_vgpr_msb

Fixes: SWDEV-562450
---
 .../Target/AMDGPU/AMDGPULowerVGPREncoding.cpp |  16 +-
 llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp  |   2 +-
 .../MCTargetDesc/AMDGPUMCTargetDesc.cpp       |   2 +-
 .../AMDGPU/vgpr-lowering-gfx1250-t16.mir      |   4 +-
 .../CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir  | 137 +++++++++---------
 .../CodeGen/AMDGPU/whole-wave-functions.ll    |  36 ++---
 6 files changed, 104 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 9b932273b2216..d7d0292083e1c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding {
   static constexpr unsigned BitsPerField = 2;
   static constexpr unsigned NumFields = 4;
   static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
+  static constexpr unsigned ModeWidth = NumFields * BitsPerField;
+  static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
   using ModeType = PackedVector<unsigned, BitsPerField,
                                 std::bitset<BitsPerField * NumFields>>;
 
@@ -152,13 +154,21 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, 
ModeTy Mask,
     CurrentMode |= NewMode;
     CurrentMask |= Mask;
 
-    MostRecentModeSet->getOperand(0).setImm(CurrentMode);
+    MachineOperand &Op = MostRecentModeSet->getOperand(0);
+
+    // Carry old mode bits from the existing instruction.
+    int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
+
+    Op.setImm(CurrentMode | OldModeBits);
     return true;
   }
 
+  // Record previous mode into high 8 bits of the immediate.
+  int64_t OldModeBits = CurrentMode << ModeWidth;
+
   I = handleClause(I);
-  MostRecentModeSet =
-      BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)).addImm(NewMode);
+  MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
+                          .addImm(NewMode | OldModeBits);
 
   CurrentMode = NewMode;
   CurrentMask = Mask;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 680e7eb3de6be..844649ebb9ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr 
*MI) {
                              *OutStreamer);
 
     if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
-      unsigned V = MI->getOperand(0).getImm();
+      unsigned V = MI->getOperand(0).getImm() & 0xff;
       OutStreamer->AddComment(
           " msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
           " src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp 
b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 013cfeb364048..28b4da8ab9ebb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst 
&Inst, uint64_t Addr,
 
 void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) {
   if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12)
-    VgprMSBs = Inst.getOperand(0).getImm();
+    VgprMSBs = Inst.getOperand(0).getImm() & 0xff;
   else if (isTerminator(Inst))
     VgprMSBs = 0;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir 
b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 8a70a8acd28d3..32cc398740d62 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -36,7 +36,7 @@ body:             |
     ; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l 
/*v386.l*/
     $vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef 
$vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x8a
+    ; GCN-NEXT: s_set_vgpr_msb 0x458a
     ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 
src1=2 src2=0
     ; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
     $vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef 
$vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
@@ -50,7 +50,7 @@ body:             |
     ; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l 
/*v642.l*/
     $vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef 
$vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xcf
+    ; GCN-NEXT: s_set_vgpr_msb 0x8acf
     ; ASM-SAME:                                         ;  msbs: dst=3 src0=3 
src1=3 src2=0
     ; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
     $vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef 
$vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir 
b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 41a7b82913bb0..7e1c28f8e7bbb 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -22,13 +22,13 @@ body:             |
     $vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec
 
     ; Single bit change
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x4101
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 
src1=0 src2=0
     ; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/
     $vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit 
$mode
 
     ; Reset
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x100
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: v_rcp_f32_e64 v255, v1
     $vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit 
$mode
@@ -40,7 +40,7 @@ body:             |
     ; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/
     $vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x544
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 
src1=1 src2=0
     ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
     ; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/
@@ -48,7 +48,7 @@ body:             |
 
     ; VOP3
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x55
+    ; GCN-NEXT: s_set_vgpr_msb 0x4455
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 
src1=1 src2=1
     ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
     $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef 
$vgpr262, 0, 0, implicit $exec, implicit $mode
@@ -58,32 +58,32 @@ body:             |
     $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef 
$vgpr262, 0, 0, implicit $exec, implicit $mode
 
     ; Tuple crossing the 256 boundary
-    ; GCN-NEXT: s_set_vgpr_msb 17
+    ; GCN-NEXT: s_set_vgpr_msb 0x5511
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 
src1=0 src2=1
     ; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, 
v[244:247] /*v[500:503]*/
     $vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, 
$vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec
 
     ; DPP/tied operand
-    ; GCN-NEXT: s_set_vgpr_msb 0x45
+    ; GCN-NEXT: s_set_vgpr_msb 0x1145
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 
src1=1 src2=0
     ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
     ; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ 
quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
     $vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef 
$vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 17
+    ; GCN-NEXT: s_set_vgpr_msb 0x4511
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 
src1=0 src2=1
     ; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ 
quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
     $vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 
15, 15, 1, implicit $exec
 
     ; DS (addr, data0, and data1 operands)
 
-    ; GCN-NEXT: s_set_vgpr_msb 20
+    ; GCN-NEXT: s_set_vgpr_msb 0x1114
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=1 src2=1
     ; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1
     DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, 
implicit $exec
 
     ; Reset
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x1400
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1
     DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, 
implicit $exec
@@ -93,13 +93,13 @@ body:             |
     ; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/
     $vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x144
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 
src1=1 src2=0
     ; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/
     $vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec
 
     ; Reset
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4400
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0
     $vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec
@@ -111,17 +111,17 @@ body:             |
     ; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off
     $vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 64
+    ; GCN-NEXT: s_set_vgpr_msb 0x140
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 
src1=0 src2=0
     ; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1]
     $vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, 
implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x4001
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=1 
src1=0 src2=0
     ; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0
     $vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit 
$exec, implicit $flat_scr
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x100
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: global_store_b32 v[0:1], v2, off
     GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -135,13 +135,13 @@ body:             |
     ; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] 
/*v[500:502]*/, off
     GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, 
implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x544
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=0 
src1=1 src2=0
     ; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ 
th:TH_ATOMIC_RETURN
     $vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit 
$exec, implicit $flat_scr
 
     ; Reset
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4400
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN
     $vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, 
implicit $flat_scr
@@ -156,12 +156,12 @@ body:             |
     ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen
     $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef 
$sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x41
+    ; GCN-NEXT: s_set_vgpr_msb 0x4041
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 
src1=0 src2=0
     ; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen
     $vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef 
$sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4100
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen
     BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef 
$sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
@@ -171,7 +171,7 @@ body:             |
     ; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen
     BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef 
$sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4100
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen
     BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef 
$sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec
@@ -183,44 +183,44 @@ body:             |
 
     ; VGPRs above 512
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xaa
+    ; GCN-NEXT: s_set_vgpr_msb 0x41aa
     ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 
src1=2 src2=2
     ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
     $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef 
$vgpr515, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xab
+    ; GCN-NEXT: s_set_vgpr_msb 0xaaab
     ; ASM-SAME:                                         ;  msbs: dst=2 src0=3 
src1=2 src2=2
     ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/
     $vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef 
$vgpr515, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xae
+    ; GCN-NEXT: s_set_vgpr_msb 0xabae
     ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 
src1=3 src2=2
     ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/
     $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef 
$vgpr515, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xba
+    ; GCN-NEXT: s_set_vgpr_msb 0xaeba
     ; ASM-SAME:                                         ;  msbs: dst=2 src0=2 
src1=2 src2=3
     ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/
     $vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef 
$vgpr771, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xea
+    ; GCN-NEXT: s_set_vgpr_msb 0xbaea
     ; ASM-SAME:                                         ;  msbs: dst=3 src0=2 
src1=2 src2=2
     ; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
     $vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef 
$vgpr515, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xff
+    ; GCN-NEXT: s_set_vgpr_msb 0xeaff
     ; ASM-SAME:                                         ;  msbs: dst=3 src0=3 
src1=3 src2=3
     ; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/
     $vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef 
$vgpr771, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x42
+    ; GCN-NEXT: s_set_vgpr_msb 0xff42
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=2 
src1=0 src2=0
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/
     $vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec
 
     ; Reset
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4200
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=0 
src1=0 src2=0
     ; GCN-NEXT: v_fma_f32 v0, v1, v2, v3
     $vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 
0, 0, implicit $exec, implicit $mode
@@ -232,12 +232,12 @@ body:             |
     ; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, 
off
     GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, 
implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 11
+    ; GCN-NEXT: s_set_vgpr_msb 0xa0b
     ; ASM-SAME:                                         ;  msbs: dst=0 src0=3 
src1=2 src2=0
     ; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] 
/*v[766:767]*/, off
     GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit 
$exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x55
+    ; GCN-NEXT: s_set_vgpr_msb 0xb55
     ; ASM-SAME:                                         ;  msbs: dst=1 src0=1 
src1=1 src2=1
     ; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] 
/*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/
     early-clobber 
$vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = 
V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef 
$vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef 
$vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed 
undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 
0, 0, implicit $exec
@@ -247,6 +247,7 @@ body:             |
 ...
 
 # ASM-LABEL: {{^}}vopd:
+
 # DIS-LABEL: <vopd>:
 ---
 name:            vopd
@@ -262,35 +263,35 @@ body:             |
     ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 
/*v256*/, v3, v4
     $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef 
$vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x41
+    ; GCN-NEXT: s_set_vgpr_msb 0x4041
     ; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 
/*v256*/, v44 /*v300*/, v4
     $vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef 
$sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit 
$exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 4
+    ; GCN-NEXT: s_set_vgpr_msb 0x4104
     ; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, 
v0, v1 /*v257*/
     $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, 
undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x401
     ; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, 
v3
     $vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef 
$vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 64
+    ; GCN-NEXT: s_set_vgpr_msb 0x140
     ; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 
v0 /*v256*/, v1, v1
     $vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 
10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, 
implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 5
+    ; GCN-NEXT: s_set_vgpr_msb 0x4005
     ; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 
/*v257*/, v2 /*v258*/
     $vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, 
undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x544
     ; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: 
v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
     $vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 
10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, 
implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 16
+    ; GCN-NEXT: s_set_vgpr_msb 0x4410
     ; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, 
v4, v5, v45 /*v301*/
     $vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef 
$vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 
0, undef $vgpr301, implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x1000
     ; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3
     $vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef 
$vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, 
$vgpr3, implicit $mode, implicit $exec
 
@@ -298,7 +299,7 @@ body:             |
     ; GCN-NEXT: v_dual_fma_f32 v244 /*v500*/, v6, v7, v8 :: v_dual_add_f32 v3 
/*v259*/, v4, v5
     $vgpr500, $vgpr259 = V_DUAL_FMA_F32_e64_X_ADD_F32_e32_e96_gfx1250 0, undef 
$vgpr6, 0, undef $vgpr7, 0, undef $vgpr8, 0, undef $vgpr4, 0, undef $vgpr5, 
implicit $mode, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0xae
+    ; GCN-NEXT: s_set_vgpr_msb 0x40ae
     ; GCN-NEXT: v_dual_fmac_f32 v2 /*v514*/, v6 /*v518*/, v8 /*v776*/ :: 
v_dual_fma_f32 v3 /*v515*/, v4 /*v516*/, v7 /*v775*/, v3 /*v515*/
     $vgpr514, $vgpr515 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, 
undef $vgpr518, 0, undef $vgpr776, undef $vgpr514, 0, undef $vgpr516, 0, undef 
$vgpr775, 0, $vgpr515, implicit $mode, implicit $exec
 
@@ -319,31 +320,31 @@ body:             |
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2 /*v258*/, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 5
+    ; GCN-NEXT: s_set_vgpr_msb 0x4505
     ; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1
     $vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x41
+    ; GCN-NEXT: s_set_vgpr_msb 0x541
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x4144
     ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
     $vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x45
+    ; GCN-NEXT: s_set_vgpr_msb 0x4445
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
     $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 5
+    ; GCN-NEXT: s_set_vgpr_msb 0x4505
     ; GCN-NEXT: v_fmamk_f32 v0, v1 /*v257*/, 0x1, v2 /*v258*/
     $vgpr0 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x41
+    ; GCN-NEXT: s_set_vgpr_msb 0x541
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2
     $vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr2, implicit $exec, 
implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0x44
+    ; GCN-NEXT: s_set_vgpr_msb 0x4144
     ; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1, 0x1, v2 /*v258*/
     $vgpr256 = V_FMAMK_F32 undef $vgpr1, 1, undef $vgpr258, implicit $exec, 
implicit $mode
 
@@ -389,15 +390,15 @@ body:             |
     ; GCN-NEXT: v_lshlrev_b32_e64 v0, v0 /*v256*/, v2
     $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr256, undef $vgpr2, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 4
+    ; GCN-NEXT: s_set_vgpr_msb 0x104
     ; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/
     $vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x401
     ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
     $vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 4
+    ; GCN-NEXT: s_set_vgpr_msb 0x104
     ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/
     $vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec
 
@@ -417,7 +418,7 @@ body:             |
     ; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
     $vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef 
$vgpr262, 0, 0, implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x5500
     ; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v2
     $vgpr0 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
 
@@ -431,7 +432,7 @@ body:             |
     ; GCN-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1, v2
     $vgpr256 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr2, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: v_fma_f32 v3, v4, v5, s2
     $vgpr3 = V_FMA_F32_e64 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $sgpr2, 
0, 0, implicit $exec, implicit $mode
 
@@ -439,17 +440,17 @@ body:             |
     ; GCN-NEXT: v_fma_f32 v3, v4 /*v260*/, v5, 1
     $vgpr3 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr5, 0, 1, 0, 0, 
implicit $exec, implicit $mode
 
-    ; GCN-NEXT: s_set_vgpr_msb 4
+    ; GCN-NEXT: s_set_vgpr_msb 0x104
     ; GCN-NEXT: v_mov_b32_e32 v0, v1
     $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
     ; GCN-NEXT: v_add_nc_u32_e32 v2, v1, v3 /*v259*/
     $vgpr2 = V_ADD_U32_e32 undef $vgpr1, undef $vgpr259, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x401
     ; GCN-NEXT: v_mov_b32_e32 v0, v0 /*v256*/
     ; GCN-NEXT: v_add_nc_u32_e32 v1, v1 /*v257*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 5
+    ; GCN-NEXT: s_set_vgpr_msb 0x105
     ; GCN-NEXT: v_add_nc_u32_e32 v2, v2 /*v258*/, v2 /*v258*/
     $vgpr0 = V_MOV_B32_e32 undef $vgpr256, implicit $exec
     $vgpr1 = V_ADD_U32_e32 undef $vgpr257, undef $vgpr1, implicit $exec
@@ -478,7 +479,7 @@ body:             |
     ; ASM: .LBB{{.*_1}}:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
 
     ; Reset on fallthrough block end
@@ -487,7 +488,7 @@ body:             |
     ; ASM-NEXT: %bb.2:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: s_branch
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_BRANCH %bb.3
@@ -498,7 +499,7 @@ body:             |
     ; ASM: .LBB{{.*_3}}:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: s_swap_pc_i64
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     $exec = S_SWAPPC_B64 undef $sgpr0_sgpr1
@@ -520,7 +521,7 @@ body:             |
     ; GCN-NEXT: v_mov_b32_e32 v0, v1
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: s_set_pc_i64
     $vgpr0 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -540,7 +541,7 @@ body:             |
     ; ASM-NEXT: %bb.7:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; ASM-NEXT: ; return to shader part epilog
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     SI_RETURN_TO_EPILOG undef $vgpr0, implicit-def $exec
@@ -558,7 +559,7 @@ body:             |
     ; ASM-NEXT: %bb.9:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: s_set_pc_i64
     $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_SETPC_B64_return undef $sgpr0_sgpr1, implicit-def $exec
@@ -576,14 +577,14 @@ body:             |
     ; ASM: %bb.0:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     $vgpr256 = V_MOV_B32_e32 undef $vgpr0, implicit $exec
 
   bb.1:
     ; ASM: .LBB{{[0-9]+}}_1:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: s_cbranch_scc0
     $vgpr257 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
     S_CBRANCH_SCC0 %bb.1, undef implicit $scc
@@ -607,7 +608,7 @@ body:             |
     ; ASM: %bb.0:
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; ASM:      def v0
     ; GCN-NOT:  s_set_vgpr_msb
     ; ASM:      use v0
@@ -641,7 +642,7 @@ body:             |
     ; GCN-NEXT: s_set_vgpr_msb 64
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: s_nop 0
-    ; GCN-NEXT: s_set_vgpr_msb 1
+    ; GCN-NEXT: s_set_vgpr_msb 0x4001
     ; GCN-NEXT: v_mov_b32_e32 v1, v0 /*v256*/
     BUNDLE implicit-def $vgpr256 {
       $vgpr256 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
@@ -683,7 +684,7 @@ body:             |
 
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
     ; GCN-NEXT: v_mov_b32_e32 v1 /*v257*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: v_mov_b32_e32 v2, v1
     ; GCN-NEXT: v_mov_b32_e32 v3, v1
     BUNDLE implicit-def $vgpr256, implicit-def $vgpr257, implicit-def $vgpr2, 
implicit-def $vgpr3, implicit undef $vgpr1 {
@@ -712,7 +713,7 @@ body:             |
 
     ; GCN-NEXT: s_clause 0x3e
     ; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v1
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x4000
     ; GCN-NEXT: v_mov_b32_e32 v1, v1
     ; GCN-NEXT: v_mov_b32_e32 v2, v1
     ; GCN-COUNT-60: v_mov_b32_e32 v1, v1
@@ -826,7 +827,7 @@ body:             |
     ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[244:259] 
/*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v1, v2
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = 
V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef 
$vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515,
 undef 
$vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515,
 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef 
$vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x500
     ; GCN-NEXT: v_wmma_scale_f32_16x16x128_f8f6f4 v[210:217], v[100:115], 
v[100:115], v[10:17], v1, v2
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = 
V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef 
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115,
 undef 
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115,
 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef 
$vgpr1, undef $vgpr2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
@@ -838,11 +839,11 @@ body:             |
     ; GCN-NEXT: v_wmma_ld_scale16_paired_b64 v[0:1], v[2:3]
     V_WMMA_LD_SCALE16_PAIRED_B64 undef $vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 
0, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 5
+    ; GCN-NEXT: s_set_vgpr_msb 0x105
     ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[244:259] 
/*v[500:515]*/, v[244:259] /*v[500:515]*/, v[10:17], v[0:1], v[2:3]
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = 
V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef 
$vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515,
 undef 
$vgpr500_vgpr501_vgpr502_vgpr503_vgpr504_vgpr505_vgpr506_vgpr507_vgpr508_vgpr509_vgpr510_vgpr511_vgpr512_vgpr513_vgpr514_vgpr515,
 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef 
$vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
-    ; GCN-NEXT: s_set_vgpr_msb 0
+    ; GCN-NEXT: s_set_vgpr_msb 0x500
     ; GCN-NEXT: v_wmma_scale16_f32_16x16x128_f8f6f4 v[210:217], v[100:115], 
v[100:115], v[10:17], v[0:1], v[2:3]
     $vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217 = 
V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr undef 
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115,
 undef 
$vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115,
 0, undef $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, undef 
$vgpr0_vgpr1, undef $vgpr2_vgpr3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll 
b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index a42c8ac706d27..75817105e74fd 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -3182,7 +3182,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 
offset:1592
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 
offset:1596
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 
offset:1600
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1608
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1612
@@ -3443,7 +3443,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 
offset:2616
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 
offset:2620
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 
offset:2624
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2632
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2636
@@ -3706,7 +3706,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 
offset:3648
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded 
Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v40, s0, 3
@@ -4135,7 +4135,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1592
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1596
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1600
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1608
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1612
@@ -4396,7 +4396,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2616
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2620
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2624
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2632
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2636
@@ -4661,7 +4661,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x 
half> %y, <2 x half> %x) convergent
   ret <2 x half> %ret
@@ -6346,7 +6346,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s32 
offset:1588
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s32 
offset:1592
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s32 
offset:1596
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s32 offset:1600
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s32 offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s32 offset:1608
@@ -6607,7 +6607,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s32 
offset:2612
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s32 
offset:2616
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s32 
offset:2620
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s32 offset:2624
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s32 offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s32 offset:2632
@@ -6872,7 +6872,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b64 s[36:37], gfx_callee@abs64
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    v_swap_b32 v0, v1
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e
@@ -7283,7 +7283,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s32 offset:1588
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s32 offset:1592
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s32 offset:1596
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s32 offset:1600
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s32 offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s32 offset:1608
@@ -7544,7 +7544,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s32 offset:2612
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s32 offset:2616
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s32 offset:2620
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s32 offset:2624
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s32 offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s32 offset:2632
@@ -7807,7 +7807,7 @@ define amdgpu_gfx_whole_wave <2 x half> 
@tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s32 
offset:3644
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[36:37]
   %ret = tail call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) 
@gfx_callee(<2 x half> %y, <2 x half> %x) convergent
   ret <2 x half> %ret
@@ -9657,7 +9657,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 
offset:1600
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 
offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 
offset:1608
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 8 ; msbs: dst=0 src0=0 src1=2 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1612
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1616
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1620
@@ -9918,7 +9918,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 
offset:2624
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 
offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 
offset:2632
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 12 ; msbs: dst=0 src0=0 src1=3 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2636
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2640
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2644
@@ -10181,7 +10181,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 
offset:3656
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x2
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42, s33
 ; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:164
@@ -10616,7 +10616,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1600
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1604
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1608
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80 ; msbs: dst=2 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1612
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1616
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1620
@@ -10877,7 +10877,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2624
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2628
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2632
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc0 ; msbs: dst=3 src0=0 src1=0 
src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2636
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2640
 ; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2644
@@ -11142,7 +11142,7 @@ define amdgpu_gfx_whole_wave void 
@call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0 ; msbs: dst=0 src0=0 src1=0 src2=0
+; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 
src2=0
 ; GFX1250-DAGISEL-NEXT:    s_set_pc_i64 s[30:31]
   %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x 
float> %x) convergent
   store float %ret, ptr %p

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to