https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/168167

Handle this for consistency with the zext case.

>From 41140cee8808f0703509a43fd9f8a51b52c6ddfc Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Fri, 14 Nov 2025 16:32:25 -0800
Subject: [PATCH] AMDGPU: Use vgpr to implement divergent i32->i64 anyext

Handle this for consistency with the zext case.
---
 llvm/lib/Target/AMDGPU/SIInstructions.td      |  7 ++++-
 llvm/test/CodeGen/AMDGPU/rem_i128.ll          | 30 +++++++------------
 .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll |  3 +-
 llvm/test/CodeGen/AMDGPU/wwm-reserved.ll      |  6 ++--
 4 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td 
b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b7256b81ee826..6cc9b3cc67530 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2951,10 +2951,15 @@ def : GCNPat <
 >;
 
 def : GCNPat <
-  (i64 (anyext i32:$src)),
+  (i64 (UniformUnaryFrag<anyext> i32:$src)),
   (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
 >;
 
+def : GCNPat <
+  (i64 (anyext i32:$src)),
+  (REG_SEQUENCE VReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
+>;
+
 class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
   (i64 (ext i1:$src)),
     (REG_SEQUENCE VReg_64,
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll 
b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index d36d95d182ab7..32862f73d2f29 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -1161,9 +1161,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add3_u32 v8, v0, v2, v8
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v0, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[8:9], s4, v[8:9]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v9
 ; GFX9-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 
killed $exec
@@ -1190,9 +1189,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add3_u32 v14, v9, v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v15
 ; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 
killed $exec
@@ -1221,9 +1219,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[14:15]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v16
 ; GFX9-O0-NEXT:    v_or_b32_e64 v9, v9, v14
@@ -1240,9 +1237,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v14, v15
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v9, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT:    v_mov_b32_e32 v15, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[14:15], s4, v[14:15]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v9, v15
 ; GFX9-O0-NEXT:    v_or_b32_e64 v8, v8, v9
@@ -1280,9 +1276,8 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v23, v24
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v24, v1
+; GFX9-O0-NEXT:    v_mov_b32_e32 v24, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[23:24], s4, v[23:24]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v24
 ; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v1
@@ -2447,9 +2442,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v10
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v10, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[17:18], s4, v[2:3]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v18
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v13, v4
@@ -2476,9 +2470,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_add3_u32 v2, v2, v3, v11
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v11, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v11
+; GFX9-O0-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[2:3], s4, v[2:3]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v3
 ; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 
killed $exec
@@ -2507,9 +2500,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[15:16]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v16
 ; GFX9-O0-NEXT:    v_or_b32_e64 v11, v11, v12
@@ -2526,9 +2518,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v15, v16
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v17, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v16, v17
+; GFX9-O0-NEXT:    v_mov_b32_e32 v16, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[15:16], s4, v[15:16]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v17, v16
 ; GFX9-O0-NEXT:    v_or_b32_e64 v10, v10, v17
@@ -2566,9 +2557,8 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v19, v20
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v12, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 
killed $exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v20, v12
+; GFX9-O0-NEXT:    v_mov_b32_e32 v20, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[19:20], s4, v[19:20]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v12, v20
 ; GFX9-O0-NEXT:    v_or_b32_e64 v5, v5, v12
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll 
b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index fe183287c46c2..81e17400973a4 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -475,9 +475,8 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) 
noinline {
 ; GFX9-O0-NEXT:    v_add3_u32 v0, v0, v1, v2
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr35
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr36
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s35
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s35
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[1:2], s34, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed 
$exec
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll 
b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 2aacb96ca4306..72672c8b6efad 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -479,9 +479,8 @@ define i64 @called_i64(i64 %a) noinline {
 ; GFX9-O0-NEXT:    v_add3_u32 v0, v0, v1, v2
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[1:2], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed 
$exec
@@ -1310,9 +1309,8 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline {
 ; GFX9-O0-NEXT:    v_add3_u32 v0, v0, v1, v2
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr5
 ; GFX9-O0-NEXT:    ; implicit-def: $sgpr6
-; GFX9-O0-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX9-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed 
$exec
-; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-O0-NEXT:    v_lshlrev_b64 v[1:2], s4, v[0:1]
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed 
$exec

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to