https://github.com/Pierre-vh updated 
https://github.com/llvm/llvm-project/pull/141591

>From 515092eec54b06b21c27a36d35b9f99448c436d8 Mon Sep 17 00:00:00 2001
From: pvanhout <[email protected]>
Date: Tue, 27 May 2025 12:29:02 +0200
Subject: [PATCH 1/3] [AMDGPU] Add KnownBits simplification combines to
 RegBankCombiner

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |  3 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll   | 30 ++++++---
 .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 61 +++++++-----------
 .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 63 +++++++------------
 llvm/test/CodeGen/AMDGPU/div_i128.ll          |  6 +-
 llvm/test/CodeGen/AMDGPU/lround.ll            | 18 +++---
 llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll   | 16 +----
 7 files changed, 81 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 3639e2b960e0a..57a3ee6d0ce04 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -250,5 +250,6 @@ def AMDGPURegBankCombiner : GICombiner<
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
    identity_combines, redundant_and, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
-   d16_load, lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
+   d16_load, lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
+   known_bits_simplifications]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 518af70cbbf9f..1d8413b82fc6a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,8 +1744,12 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX6-LABEL: v_lshr_i65_33:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v3, v1
-; GFX6-NEXT:    v_mov_b32_e32 v0, 1
+; GFX6-NEXT:    v_mov_b32_e32 v3, 1
+; GFX6-NEXT:    v_mov_b32_e32 v4, 0
+; GFX6-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX6-NEXT:    v_lshl_b64 v[2:3], v[3:4], 31
+; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
@@ -1757,8 +1761,12 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX8-LABEL: v_lshr_i65_33:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, v1
-; GFX8-NEXT:    v_mov_b32_e32 v0, 1
+; GFX8-NEXT:    v_mov_b32_e32 v3, 1
+; GFX8-NEXT:    v_mov_b32_e32 v4, 0
+; GFX8-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
@@ -1770,8 +1778,12 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX9-LABEL: v_lshr_i65_33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, 1
+; GFX9-NEXT:    v_mov_b32_e32 v3, 1
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
@@ -1783,8 +1795,10 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX10-LABEL: v_lshr_i65_33:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v3, v1
-; GFX10-NEXT:    v_mov_b32_e32 v0, 1
+; GFX10-NEXT:    v_mov_b32_e32 v3, 1
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 14332dfeaabd8..bf48ebb8242df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg 
%rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg 
%rhs) {
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -388,11 +386,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s3, s1, 8
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s1, s4, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s5
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -402,11 +399,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s2, s3, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s4
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -789,11 +785,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 24
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_sub_i32 s8, 0x8000, s8
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s1, s8, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s9
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -803,11 +798,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_sub_i32 s8, 0x7fff, s8
 ; GFX8-NEXT:    s_max_i32 s2, s5, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s8
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
@@ -817,11 +811,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, 0
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s3, s5, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_add_i32 s2, s2, s3
@@ -831,14 +824,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_min_i32 s5, s5, 0
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_sub_i32 s5, 0x8000, s5
-; GFX8-NEXT:    s_sext_i32_i16 s5, s5
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
-; GFX8-NEXT:    s_max_i32 s4, s5, s4
+; GFX8-NEXT:    s_sext_i32_i16 s5, s5
+; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
+; GFX8-NEXT:    s_max_i32 s4, s5, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s6
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2633,11 +2625,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 
inreg %rhs) {
 ; GFX8-NEXT:    s_max_i32 s3, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_sub_i32 s3, 0x7fff, s3
 ; GFX8-NEXT:    s_max_i32 s1, s2, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s3
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_add_i32 s0, s0, s1
@@ -2837,11 +2828,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg 
%lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s5
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3194,11 +3184,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, 0
 ; GFX8-NEXT:    s_min_i32 s6, s6, 0
 ; GFX8-NEXT:    s_sub_i32 s6, 0x8000, s6
+; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
-; GFX8-NEXT:    s_sub_i32 s7, 0x7fff, s7
 ; GFX8-NEXT:    s_max_i32 s6, s6, s8
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_min_i32 s6, s6, s7
@@ -3219,11 +3208,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s6, s2, 0
 ; GFX8-NEXT:    s_min_i32 s2, s2, 0
 ; GFX8-NEXT:    s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0x7fff, s6
 ; GFX8-NEXT:    s_max_i32 s2, s2, s7
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_min_i32 s2, s2, s6
@@ -3517,11 +3505,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s10, s9, 0
 ; GFX8-NEXT:    s_min_i32 s9, s9, 0
 ; GFX8-NEXT:    s_sub_i32 s9, 0x8000, s9
+; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s3
-; GFX8-NEXT:    s_sub_i32 s10, 0x7fff, s10
 ; GFX8-NEXT:    s_max_i32 s9, s9, s11
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_min_i32 s9, s9, s10
@@ -3542,11 +3529,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s4
-; GFX8-NEXT:    s_sub_i32 s9, 0x7fff, s9
 ; GFX8-NEXT:    s_max_i32 s3, s3, s10
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s9
@@ -3567,11 +3553,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s4, s3, 0
 ; GFX8-NEXT:    s_min_i32 s3, s3, 0
 ; GFX8-NEXT:    s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s5
-; GFX8-NEXT:    s_sub_i32 s4, 0x7fff, s4
 ; GFX8-NEXT:    s_max_i32 s3, s3, s9
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_min_i32 s3, s3, s4
@@ -3928,11 +3913,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s13, s12, 0
 ; GFX8-NEXT:    s_min_i32 s12, s12, 0
 ; GFX8-NEXT:    s_sub_i32 s12, 0x8000, s12
+; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s4
-; GFX8-NEXT:    s_sub_i32 s13, 0x7fff, s13
 ; GFX8-NEXT:    s_max_i32 s12, s12, s14
-; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_min_i32 s12, s12, s13
@@ -3953,11 +3937,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s12, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s5
-; GFX8-NEXT:    s_sub_i32 s12, 0x7fff, s12
 ; GFX8-NEXT:    s_max_i32 s4, s4, s13
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s12
@@ -3978,11 +3961,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s6
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s12
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
@@ -4003,11 +3985,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s5, s4, 0
 ; GFX8-NEXT:    s_min_i32 s4, s4, 0
 ; GFX8-NEXT:    s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s7
-; GFX8-NEXT:    s_sub_i32 s5, 0x7fff, s5
 ; GFX8-NEXT:    s_max_i32 s4, s4, s6
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_min_i32 s4, s4, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 3b9ff6c2f1741..7b2ca1f98b151 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg 
%rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 9
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg 
%rhs) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -388,11 +386,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s5, s5, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s5, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_min_i32 s1, s1, s4
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -402,11 +399,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, 
i16 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
 ; GFX8-NEXT:    s_add_i32 s4, s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s4, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_min_i32 s2, s2, s3
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -789,11 +785,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 8
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s9, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_min_i32 s1, s1, s8
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -803,11 +798,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s8, s5, -1
 ; GFX8-NEXT:    s_add_i32 s8, s8, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s2, s8, s2
-; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s2, s2, s5
 ; GFX8-NEXT:    s_sub_i32 s1, s1, s2
@@ -817,11 +811,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_max_i32 s6, s5, -1
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
+; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
-; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
 ; GFX8-NEXT:    s_max_i32 s3, s6, s3
-; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_min_i32 s3, s3, s5
 ; GFX8-NEXT:    s_sub_i32 s2, s2, s3
@@ -831,14 +824,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, 
i32 inreg %rhs.arg) {
 ; GFX8-NEXT:    s_lshl_b32 s4, s7, 8
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s5, s5, -1
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_add_i32 s5, s5, 0x8000
-; GFX8-NEXT:    s_max_i32 s4, s6, s4
+; GFX8-NEXT:    s_sext_i32_i16 s6, s6
+; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s0, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s1, 8
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
+; GFX8-NEXT:    s_max_i32 s4, s6, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 8
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
@@ -2637,11 +2629,10 @@ define amdgpu_ps i16 @s_ssubsat_i16(i16 inreg %lhs, i16 
inreg %rhs) {
 ; GFX8-NEXT:    s_max_i32 s3, s2, -1
 ; GFX8-NEXT:    s_add_i32 s3, s3, 0x8001
 ; GFX8-NEXT:    s_min_i32 s2, s2, -1
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_sext_i32_i16 s1, s1
-; GFX8-NEXT:    s_add_i32 s2, s2, 0x8000
 ; GFX8-NEXT:    s_max_i32 s1, s3, s1
-; GFX8-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX8-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX8-NEXT:    s_min_i32 s1, s1, s2
 ; GFX8-NEXT:    s_sub_i32 s0, s0, s1
@@ -2842,11 +2833,10 @@ define amdgpu_ps i32 @s_ssubsat_v2i16(<2 x i16> inreg 
%lhs, <2 x i16> inreg %rhs
 ; GFX8-NEXT:    s_max_i32 s4, s3, -1
 ; GFX8-NEXT:    s_add_i32 s4, s4, 0x8001
 ; GFX8-NEXT:    s_min_i32 s3, s3, -1
+; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s5, s1
-; GFX8-NEXT:    s_add_i32 s3, s3, 0x8000
 ; GFX8-NEXT:    s_max_i32 s4, s4, s5
-; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_sext_i32_i16 s3, s3
 ; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 ; GFX8-NEXT:    s_min_i32 s3, s4, s3
@@ -3199,11 +3189,10 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, -1
 ; GFX8-NEXT:    s_add_i32 s7, s7, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s2
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s7, s7, s8
-; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    s_min_i32 s6, s7, s6
@@ -3224,11 +3213,10 @@ define amdgpu_ps <2 x i32> @s_ssubsat_v4i16(<4 x i16> 
inreg %lhs, <4 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s6, s4, -1
 ; GFX8-NEXT:    s_add_i32 s6, s6, 0x8001
 ; GFX8-NEXT:    s_min_i32 s4, s4, -1
+; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s3
-; GFX8-NEXT:    s_add_i32 s4, s4, 0x8000
 ; GFX8-NEXT:    s_max_i32 s6, s6, s7
-; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_sext_i32_i16 s4, s4
 ; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 ; GFX8-NEXT:    s_min_i32 s4, s6, s4
@@ -3522,11 +3510,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s10, s9, -1
 ; GFX8-NEXT:    s_add_i32 s10, s10, 0x8001
 ; GFX8-NEXT:    s_min_i32 s9, s9, -1
+; GFX8-NEXT:    s_add_i32 s9, s9, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s11, s3
-; GFX8-NEXT:    s_add_i32 s9, s9, 0x8000
 ; GFX8-NEXT:    s_max_i32 s10, s10, s11
-; GFX8-NEXT:    s_sext_i32_i16 s10, s10
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_lshr_b32 s6, s0, 16
 ; GFX8-NEXT:    s_min_i32 s9, s10, s9
@@ -3547,11 +3534,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s6, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s4
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s10
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_min_i32 s6, s9, s6
@@ -3572,11 +3558,10 @@ define amdgpu_ps <3 x i32> @s_ssubsat_v6i16(<6 x i16> 
inreg %lhs, <6 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s7, s6, -1
 ; GFX8-NEXT:    s_add_i32 s7, s7, 0x8001
 ; GFX8-NEXT:    s_min_i32 s6, s6, -1
+; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s5
-; GFX8-NEXT:    s_add_i32 s6, s6, 0x8000
 ; GFX8-NEXT:    s_max_i32 s7, s7, s9
-; GFX8-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX8-NEXT:    s_sext_i32_i16 s6, s6
 ; GFX8-NEXT:    s_lshr_b32 s8, s2, 16
 ; GFX8-NEXT:    s_min_i32 s6, s7, s6
@@ -3933,11 +3918,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s13, s12, -1
 ; GFX8-NEXT:    s_add_i32 s13, s13, 0x8001
 ; GFX8-NEXT:    s_min_i32 s12, s12, -1
+; GFX8-NEXT:    s_add_i32 s12, s12, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s14, s4
-; GFX8-NEXT:    s_add_i32 s12, s12, 0x8000
 ; GFX8-NEXT:    s_max_i32 s13, s13, s14
-; GFX8-NEXT:    s_sext_i32_i16 s13, s13
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 ; GFX8-NEXT:    s_min_i32 s12, s13, s12
@@ -3958,11 +3942,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s12, s8, -1
 ; GFX8-NEXT:    s_add_i32 s12, s12, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s13, s5
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s12, s12, s13
-; GFX8-NEXT:    s_sext_i32_i16 s12, s12
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 ; GFX8-NEXT:    s_min_i32 s8, s12, s8
@@ -3983,11 +3966,10 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_max_i32 s9, s8, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s12, s6
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s12
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 ; GFX8-NEXT:    s_min_i32 s8, s9, s8
@@ -4006,13 +3988,12 @@ define amdgpu_ps <4 x i32> @s_ssubsat_v8i16(<8 x i16> 
inreg %lhs, <8 x i16> inre
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s3
 ; GFX8-NEXT:    s_max_i32 s9, s8, -1
 ; GFX8-NEXT:    s_add_i32 s9, s9, 0x8001
-; GFX8-NEXT:    s_sub_i32 s6, s10, s6
 ; GFX8-NEXT:    s_min_i32 s8, s8, -1
+; GFX8-NEXT:    s_sub_i32 s6, s10, s6
+; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s10, s7
-; GFX8-NEXT:    s_add_i32 s8, s8, 0x8000
 ; GFX8-NEXT:    s_max_i32 s9, s9, s10
-; GFX8-NEXT:    s_sext_i32_i16 s9, s9
 ; GFX8-NEXT:    s_sext_i32_i16 s8, s8
 ; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 ; GFX8-NEXT:    s_min_i32 s8, s9, s8
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll 
b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index dd9a013d37203..efbca0c1b812f 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4355,11 +4355,11 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
 ; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed 
$exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v4
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed 
$exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
 ; GFX9-G-O0-NEXT:    s_mov_b32 s5, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v2, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/lround.ll 
b/llvm/test/CodeGen/AMDGPU/lround.ll
index d8d8308f6cd8a..e09c9775fe0aa 100644
--- a/llvm/test/CodeGen/AMDGPU/lround.ll
+++ b/llvm/test/CodeGen/AMDGPU/lround.ll
@@ -116,7 +116,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -142,7 +142,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -172,7 +172,7 @@ define i32 @intrinsic_lround_i32_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -374,7 +374,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -414,7 +414,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -456,7 +456,7 @@ define i64 @intrinsic_lround_i64_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
@@ -665,7 +665,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v6, 0x3ff00000
 ; GFX9-GISEL-NEXT:    s_brev_b32 s4, 1
 ; GFX9-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX9-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-GISEL-NEXT:    v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5
 ; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v6, vcc
 ; GFX9-GISEL-NEXT:    v_and_or_b32 v1, v1, s4, v4
@@ -705,7 +705,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX10-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX10-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-GISEL-NEXT:    v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5
 ; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4
 ; GFX10-GISEL-NEXT:    v_and_or_b32 v1, 0x80000000, v1, v4
@@ -747,7 +747,7 @@ define i64 @intrinsic_llround_i64_f64(double %arg) {
 ; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[2:3], v[0:1]
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | 
instid1(VALU_DEP_2)
 ; GFX11-GISEL-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
-; GFX11-GISEL-NEXT:    v_and_or_b32 v0, v0, 0, 0
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-GISEL-NEXT:    v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
 ; GFX11-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll 
b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index 38f2310b5c62d..3d8688345f639 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -240,12 +240,8 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
 ; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_max_i32 s2, s2, 0
-; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s3
-; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
 ; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-VI-NEXT:    s_min_i32 s2, s2, 0xff
-; GISEL-VI-NEXT:    s_and_b32 s3, 0xffff, s3
-; GISEL-VI-NEXT:    s_and_b32 s2, 0xffff, s2
 ; GISEL-VI-NEXT:    s_lshl_b32 s3, s3, 16
 ; GISEL-VI-NEXT:    s_or_b32 s2, s2, s3
 ; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0
@@ -263,8 +259,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX9-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX9-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX9-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX9-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX9-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX9-NEXT:    s_pack_ll_b32_b16 s2, s2, s3
@@ -281,8 +275,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX11-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX11-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX11-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX11-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX11-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | 
instid1(SALU_CYCLE_1)
@@ -300,8 +292,6 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr 
addrspace(1) %out, i32 inreg
 ; GISEL-GFX12-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX12-NEXT:    s_max_i32 s2, s2, 0
 ; GISEL-GFX12-NEXT:    s_max_i32 s3, s3, 0
-; GISEL-GFX12-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-GFX12-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-GFX12-NEXT:    s_min_i32 s2, s2, 0xff
 ; GISEL-GFX12-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | 
instid1(SALU_CYCLE_1)
@@ -739,13 +729,11 @@ define amdgpu_kernel void @vec_smax_smin_sgpr(ptr 
addrspace(1) %out, <2 x i16> i
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s2
 ; GISEL-VI-NEXT:    s_bfe_i32 s2, s2, 0x100010
 ; GISEL-VI-NEXT:    s_max_i32 s2, s2, 0
-; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_sext_i32_i16 s2, s2
-; GISEL-VI-NEXT:    s_sext_i32_i16 s3, s3
 ; GISEL-VI-NEXT:    s_min_i32 s2, s2, 0xff
-; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
+; GISEL-VI-NEXT:    s_max_i32 s3, s3, 0
 ; GISEL-VI-NEXT:    s_and_b32 s2, 0xffff, s2
-; GISEL-VI-NEXT:    s_and_b32 s3, 0xffff, s3
+; GISEL-VI-NEXT:    s_min_i32 s3, s3, 0xff
 ; GISEL-VI-NEXT:    s_lshl_b32 s2, s2, 16
 ; GISEL-VI-NEXT:    s_or_b32 s2, s3, s2
 ; GISEL-VI-NEXT:    v_mov_b32_e32 v0, s0

>From 6c22e571b06591afbf6cd66bbf02906916b51a9a Mon Sep 17 00:00:00 2001
From: pvanhout <[email protected]>
Date: Wed, 28 May 2025 11:12:27 +0200
Subject: [PATCH 2/3] Remove duplicate combines

---
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td 
b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 57a3ee6d0ce04..4b2da02329d00 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -245,10 +245,9 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
 
 def AMDGPURegBankCombiner : GICombiner<
   "AMDGPURegBankCombinerImpl",
-  [unmerge_merge, unmerge_cst, unmerge_undef,
-   zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
-   fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
-   identity_combines, redundant_and, constant_fold_cast_op,
+  [unmerge_merge, unmerge_cst, unmerge_undef, int_minmax_to_med3,
+   ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3,
+   fmed3_intrinsic_to_clamp, identity_combines, constant_fold_cast_op,
    cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
    d16_load, lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
    known_bits_simplifications]> {

>From 3c71ffaea001e4eecb5e9265e13e6cc4e31ccc50 Mon Sep 17 00:00:00 2001
From: pvanhout <[email protected]>
Date: Thu, 4 Dec 2025 10:48:59 +0100
Subject: [PATCH 3/3] rebase

---
 llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll |  6 -----
 llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 30 ++++++---------------
 llvm/test/CodeGen/AMDGPU/div_i128.ll        |  6 ++---
 3 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
index a43bfb5d45679..a58a0e3d0c951 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
@@ -773,7 +773,6 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, 
ptr addrspace(1) %in1
 ; CI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; CI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
 ; CI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; CI-NEXT:    v_or_b32_e32 v0, s6, v0
 ; CI-NEXT:    v_or_b32_e32 v1, s7, v1
 ; CI-NEXT:  .LBB6_8: ; %Flow17
 ; CI-NEXT:    v_cmp_nlg_f64_e64 vcc, s[4:5], 0
@@ -882,7 +881,6 @@ define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, 
ptr addrspace(1) %in1
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
 ; VI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; VI-NEXT:    v_or_b32_e32 v0, s6, v0
 ; VI-NEXT:    v_or_b32_e32 v1, s7, v1
 ; VI-NEXT:  .LBB6_8: ; %Flow17
 ; VI-NEXT:    v_cmp_nlg_f64_e64 vcc, s[4:5], 0
@@ -3247,7 +3245,6 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) 
%out, ptr addrspace(1) %i
 ; CI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; CI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
 ; CI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; CI-NEXT:    v_or_b32_e32 v0, s2, v0
 ; CI-NEXT:    v_or_b32_e32 v1, s3, v1
 ; CI-NEXT:  .LBB13_8: ; %Flow54
 ; CI-NEXT:    v_mov_b32_e32 v2, s10
@@ -3334,7 +3331,6 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) 
%out, ptr addrspace(1) %i
 ; CI-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; CI-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
 ; CI-NEXT:    v_and_b32_e32 v3, 0x7fffffff, v3
-; CI-NEXT:    v_or_b32_e32 v2, s2, v2
 ; CI-NEXT:    v_or_b32_e32 v3, s3, v3
 ; CI-NEXT:  .LBB13_16: ; %Flow50
 ; CI-NEXT:    v_cmp_nlg_f64_e64 vcc, s[8:9], 0
@@ -3449,7 +3445,6 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) 
%out, ptr addrspace(1) %i
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
 ; VI-NEXT:    v_ldexp_f64 v[0:1], v[0:1], v8
 ; VI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
-; VI-NEXT:    v_or_b32_e32 v0, s2, v0
 ; VI-NEXT:    v_or_b32_e32 v1, s3, v1
 ; VI-NEXT:  .LBB13_8: ; %Flow54
 ; VI-NEXT:    v_mov_b32_e32 v2, s10
@@ -3536,7 +3531,6 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) 
%out, ptr addrspace(1) %i
 ; VI-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
 ; VI-NEXT:    v_ldexp_f64 v[2:3], v[2:3], v10
 ; VI-NEXT:    v_and_b32_e32 v3, 0x7fffffff, v3
-; VI-NEXT:    v_or_b32_e32 v2, s2, v2
 ; VI-NEXT:    v_or_b32_e32 v3, s3, v3
 ; VI-NEXT:  .LBB13_16: ; %Flow50
 ; VI-NEXT:    v_cmp_nlg_f64_e64 vcc, s[8:9], 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 1d8413b82fc6a..518af70cbbf9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,12 +1744,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX6-LABEL: v_lshr_i65_33:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v3, 1
-; GFX6-NEXT:    v_mov_b32_e32 v4, 0
-; GFX6-NEXT:    v_and_b32_e32 v3, 1, v2
-; GFX6-NEXT:    v_lshl_b64 v[2:3], v[3:4], 31
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX6-NEXT:    v_mov_b32_e32 v3, v1
+; GFX6-NEXT:    v_mov_b32_e32 v0, 1
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX6-NEXT:    v_lshl_b64 v[0:1], v[0:1], 31
@@ -1761,12 +1757,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX8-LABEL: v_lshr_i65_33:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v3, 1
-; GFX8-NEXT:    v_mov_b32_e32 v4, 0
-; GFX8-NEXT:    v_and_b32_e32 v3, 1, v2
-; GFX8-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
-; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
-; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_mov_b32_e32 v3, v1
+; GFX8-NEXT:    v_mov_b32_e32 v0, 1
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
@@ -1778,12 +1770,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX9-LABEL: v_lshr_i65_33:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, 1
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_and_b32_e32 v3, 1, v2
-; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 31, v[3:4]
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
-; GFX9-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, v1
+; GFX9-NEXT:    v_mov_b32_e32 v0, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX9-NEXT:    v_lshlrev_b64 v[0:1], 31, v[0:1]
@@ -1795,10 +1783,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
 ; GFX10-LABEL: v_lshr_i65_33:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v3, 1
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0
-; GFX10-NEXT:    v_and_b32_e32 v3, 1, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 1, v1
+; GFX10-NEXT:    v_mov_b32_e32 v3, v1
+; GFX10-NEXT:    v_mov_b32_e32 v0, 1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 1, v2
 ; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 1, v3
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll 
b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index efbca0c1b812f..dd9a013d37203 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -4355,11 +4355,11 @@ define i128 @v_sdiv_i128_v_pow2k(i128 %lhs) {
 ; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v4, s[6:7], v4, v5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v1, s[6:7], v1, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v2, s[6:7], v2, v0, s[6:7]
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v5, s[6:7], v2, v0, s[6:7]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v4, s[6:7], v3, v0, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed 
$exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v4
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed 
$exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v4
 ; GFX9-G-O0-NEXT:    s_mov_b32 s5, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v2, v0, v1

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to