https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/141591

>From 7c8f90225928c0dbffcfa03bd20da3419a80095f Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutr...@amd.com>
Date: Tue, 27 May 2025 12:29:02 +0200
Subject: [PATCH 1/2] [AMDGPU] Add KnownBits simplification combines to
 RegBankCombiner

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |  3 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   | 59 ++++++++---------
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 61 +++++++-----------
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 63 +++++++------------
 llvm/test/CodeGen/AMDGPU/div_i128.ll          | 30 ++++-----
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        | 11 ++--
 llvm/test/CodeGen/AMDGPU/lround.ll            | 18 +++---
 llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll   | 16 +----
 8 files changed, 104 insertions(+), 157 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 96be17c487130..df867aaa204b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner<
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
    identity_combines, redundant_and, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
-   lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
+   lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
+   known_bits_simplifications]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 6baa10bb48621..cc0f45681a3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX6-LABEL: v_lshr_i65_33:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v3, v1
-; GFX6-NEXT:    v_mov_b32_e32 v0, 1
+; GFX6-NEXT:    v_mov_b32_e32 v3, 1
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0
+; GFX6-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX6-NEXT:    v_lshl_b64 v[2:3], v[3:4], 31
+; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
-; GFX6-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX6-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_lshr_i65_33:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, v1
-; GFX8-NEXT:    v_mov_b32_e32 v0, 1
+; GFX8-NEXT:    v_mov_b32_e32 v3, 1
+; GFX8-NEXT:    v_mov_b32_e32 v4, 0
+; GFX8-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_lshr_i65_33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-NEXT:    v_mov_b32_e32 v3, 1
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX9-NEXT:    v_or_b32_e32 v0, v2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_lshr_i65_33:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v3, v1
-; GFX10-NEXT:    v_mov_b32_e32 v0, 1
+; GFX10-NEXT:    v_mov_b32_e32 v3, 1
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_and_b32_e32 v0, 1, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX10-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX10-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX10-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_lshr_i65_33:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
-; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
-; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX11-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX11-NEXT:    v_mov_b32_e32 v3, 1
+; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
+; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX11-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX11-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %result = lshr i65 %value, 33
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index f317526e6de47..de7e7c7de38b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg 
%rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg 
%rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s1, s4, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s2, s3, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s8, 0x8000, s8
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s1, s8, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s9
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_max_i32 s2, s5, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s8
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s3, s5, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
-; GFX8-NEXT:    s_sext_i32_i16 s5, s5
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
-; GFX8-NEXT:    s_max_i32 s4, s5, s4
+; GFX8-NEXT:    s_sext_i32_i16 s5, s5
+; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
+; GFX8-NEXT:    s_max_i32 s4, s5, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 
inreg %rhs) {
 ; GFX8-NEXT:    s_max_i32 s3, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg 
%lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s5
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, 0
 ; GFX8-NEXT:    s_min_i32 s6, s6, 0
 ; GFX8-NEXT:    s_sub_i32 s6, 0x8000, s6
+; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
-; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_max_i32 s6, s6, s8
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s6, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s2, s2, s7
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s10, s9, 0
 ; GFX8-NEXT:    s_min_i32 s9, s9, 0
 ; GFX8-NEXT:    s_sub_i32 s9, 0x8000, s9
+; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s3
-; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_max_i32 s9, s9, s11
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s4
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s3, s3, s10
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s5
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s9
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s13, s12, 0
 ; GFX8-NEXT:    s_min_i32 s12, s12, 0
 ; GFX8-NEXT:    s_sub_i32 s12, 0x8000, s12
+; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s4
-; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_max_i32 s12, s12, s14
-; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s12, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s5
-; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_max_i32 s4, s4, s13
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s6
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s12
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s7
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s6
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 6873c9e6b9b4e..9028cc2e44995 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg 
%rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg 
%rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -387,11 +385,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s5, s5, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s5, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -401,11 +398,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
 ; GFX8-NEXT:    s_add_i32 s4, s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s4, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -788,11 +784,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s9, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -802,11 +797,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, -1
 ; GFX8-NEXT:    s_add_i32 s8, s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s8, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -816,11 +810,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, -1
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s3
@@ -830,14 +823,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
-; GFX8-NEXT:    s_max_i32 s4, s6, s4
+; GFX8-NEXT:    s_sext_i32_i16 s6, s6
+; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
+; GFX8-NEXT:    s_max_i32 s4, s6, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2634,11 +2626,10 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 
inreg %rhs) {
 ; GFX8-NEXT:    s_max_i32 s3, s2, -1
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -2839,11 +2830,10 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg 
%lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
 ; GFX8-NEXT:    s_add_i32 s4, s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
-; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s5
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_min_i32 s3, s4, s3
@@ -3196,11 +3186,10 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, -1
 ; GFX8-NEXT:    s_add_i32 s7, s7, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s7, s7, s8
-; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_min_i32 s6, s7, s6
@@ -3221,11 +3210,10 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s6, s4, -1
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s3
-; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s6, s7
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s6, s4
@@ -3519,11 +3507,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s10, s9, -1
 ; GFX8-NEXT:    s_add_i32 s10, s10, 0x8001
 ; GFX8-NEXT:    s_min_i32 s9, s9, -1
+; GFX8-NEXT:    s_add_i32 s9, s9, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s3
-; GFX8-NEXT:    s_add_i32 s9, s9, 0x8000
 ; GFX8-NEXT:    s_max_i32 s10, s10, s11
-; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_min_i32 s9, s10, s9
@@ -3544,11 +3531,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s6, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s4
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s10
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_min_i32 s6, s9, s6
@@ -3569,11 +3555,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, -1
 ; GFX8-NEXT:    s_add_i32 s7, s7, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s5
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s7, s7, s9
-; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_min_i32 s6, s7, s6
@@ -3930,11 +3915,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s13, s12, -1
 ; GFX8-NEXT:    s_add_i32 s13, s13, 0x8001
 ; GFX8-NEXT:    s_min_i32 s12, s12, -1
+; GFX8-NEXT:    s_add_i32 s12, s12, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s4
-; GFX8-NEXT:    s_add_i32 s12, s12, 0x8000
 ; GFX8-NEXT:    s_max_i32 s13, s13, s14
-; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_min_i32 s12, s13, s12
@@ -3955,11 +3939,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s12, s8, -1
 ; GFX8-NEXT:    s_add_i32 s12, s12, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s5
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s12, s12, s13
-; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 ; GFX8-NEXT:    s_min_i32 s8, s12, s8
@@ -3980,11 +3963,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s8, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s6
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s12
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_min_i32 s8, s9, s8
@@ -4003,13 +3985,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s3
 ; GFX8-NEXT:    s_max_i32 s9, s8, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
-; GFX8-NEXT:    s_sub_i32 s6, s10, s6
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_sub_i32 s6, s10, s6
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s7
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s10
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_min_i32 s8, s9, s8
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll 
b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index 06c0417211809..1e82b54c4031e 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4501,21 +4501,19 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
 ; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed 
$exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed 
$exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-G-O0-NEXT:    s_mov_b32 s5, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v0, v0, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[5:6], v2, v[5:6]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v3
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v1, v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[5:6], v1, v[2:3]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v2
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v3, v2, v4
@@ -4590,14 +4588,12 @@ define i128 @v_udiv_i128_v_pow2k(i128 %lhs) {
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v0, v0, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 31
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[5:6], v2, v[4:5]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v4
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v1, v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v1, v[4:5]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v2
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v2, v2, v3
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll 
b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index c316ec71863d0..968471287dc4d 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -797,12 +797,11 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB2_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    v_and_b32_e32 v1, 0x80000000, v6
 ; GISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff00000
-; GISEL-NEXT:    v_mov_b32_e32 v3, 0xfffff
+; GISEL-NEXT:    v_and_b32_e32 v1, 0x80000000, v6
 ; GISEL-NEXT:    v_lshl_add_u32 v2, v8, 20, v2
-; GISEL-NEXT:    v_and_or_b32 v1, v10, v3, v1
-; GISEL-NEXT:    v_or3_b32 v1, v1, v2, 0
+; GISEL-NEXT:    v_and_b32_e32 v3, 0xfffff, v10
+; GISEL-NEXT:    v_or3_b32 v1, v3, v1, v2
 ; GISEL-NEXT:  .LBB2_14: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1081,8 +1080,8 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff00000
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v7, 20, v0
-; GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff, v9
-; GISEL-NEXT:    v_or3_b32 v5, v1, v0, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0xfffff
+; GISEL-NEXT:    v_and_or_b32 v5, v9, v1, v0
 ; GISEL-NEXT:  .LBB3_14: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, v4
diff --git a/llvm/test/CodeGen/AMDGPU/lround.ll 
b/llvm/test/CodeGen/AMDGPU/lround.ll
index 8036e32f90eb0..4c0774194d78f 100644
--- a/llvm/test/CodeGen/AMDGPU/lround.ll
+++ b/llvm/test/CodeGen/AMDGPU/lround.ll
@@ -116,7 +116,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -142,7 +142,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -172,7 +172,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -374,7 +374,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -414,7 +414,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -456,7 +456,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -665,7 +665,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -705,7 +705,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -747,7 +747,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll 
b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index 79c4cda2eeaef..88ac211453daa 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -246,12 +246,8 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
 ; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_max_i32 s2, s2, 0
-; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
 ; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-VI-NEXT:    s_min_i32 s2, s2, 0xff
-; GISEL-VI-NEXT:    s_and_b32 s3, 0xffff, s3
-; GISEL-VI-NEXT:    s_and_b32 s2, 0xffff, s2
 ; GISEL-VI-NEXT:    s_lshl_b32 s3, s3, 16
 ; GISEL-VI-NEXT:    s_or_b32 s2, s2, s3
 ; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0
@@ -269,8 +265,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX9-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX9-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX9-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX9-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
@@ -287,8 +281,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX11-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX11-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX11-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX11-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | 
instid1(SALU_CYCLE_1)
@@ -306,8 +298,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX12-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX12-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX12-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX12-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX12-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX12-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX12-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | 
instid1(SALU_CYCLE_1)
@@ -745,13 +735,11 @@ define amdgpu_kernel void @vec_smax_smin_sgpr(ptr 
addrspace(1) %out, <2 x i16> i
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s2
 ; GISEL-VI-NEXT:    s_bfe_i32 s2, s2, 0x100010
 ; GISEL-VI-NEXT:    s_max_i32 s2, s2, 0
-; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-VI-NEXT:    s_min_i32 s2, s2, 0xff
-; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
+; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_and_b32 s2, 0xffff, s2
-; GISEL-VI-NEXT:    s_and_b32 s3, 0xffff, s3
+; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-VI-NEXT:    s_lshl_b32 s2, s2, 16
 ; GISEL-VI-NEXT:    s_or_b32 s2, s3, s2
 ; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0

>From 39ae19d11123de1b9f35f525dc9d0299eedca60a Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutr...@amd.com>
Date: Wed, 28 May 2025 11:12:27 +0200
Subject: [PATCH 2/2] Remove duplicate combines

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index df867aaa204b1..759d0b077bf21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -205,10 +205,9 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
 
 def AMDGPURegBankCombiner : GICombiner<
   "AMDGPURegBankCombinerImpl",
-  [unmerge_merge, unmerge_cst, unmerge_undef,
-   zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
-   fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
-   identity_combines, redundant_and, constant_fold_cast_op,
+  [unmerge_merge, unmerge_cst, unmerge_undef, int_minmax_to_med3,
+   ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3,
+   fmed3_intrinsic_to_clamp, identity_combines, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
    lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
    known_bits_simplifications]> {

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to