[llvm-branch-commits] [llvm] [GlobalISel] Combine redundant sext_inreg (PR #131624)

via llvm-branch-commits Mon, 17 Mar 2025 07:45:01 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Pierre van Houtryve (Pierre-vh)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/131624.diff


6 Files Affected:

- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3) 
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+8-1) 
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp (+27) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir 
(+164) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir 
(+87) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll (-5) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h 
b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b78342c8fc39..5778377d125a8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -994,6 +994,9 @@ class CombinerHelper {
   // overflow sub
   bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
+  // (sext_inreg (sext_inreg x, K0), K1)
+  void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td 
b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 660b03080f92e..6a0ff683a4647 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, 
G_ANYEXT>;
 def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
 def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
 
+def sext_inreg_of_sext_inreg : GICombineRule<
+   (defs root:$dst),
+   (match (G_SEXT_INREG $x, $src, $a):$other,
+          (G_SEXT_INREG $dst, $x, $b):$root),
+   (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
+
 // Push cast through build vector.
 class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
   (defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[
   sext_of_anyext,
   anyext_of_anyext,
   anyext_of_zext,
-  anyext_of_sext
+  anyext_of_sext,
+  sext_inreg_of_sext_inreg,
 ]>;
 
 def cast_combines: GICombineGroup<[
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp 
b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 182484754d091..ffc2384fc14fd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -372,3 +372,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr 
&CastMI,
     return false;
   }
 }
+
+void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
+                                             MachineInstr &Other) const {
+  assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
+         Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+  unsigned RootWidth = Root.getOperand(2).getImm();
+  unsigned OtherWidth = Other.getOperand(2).getImm();
+
+  Register Dst = Root.getOperand(0).getReg();
+  Register OtherDst = Other.getOperand(0).getReg();
+  Register Src = Other.getOperand(1).getReg();
+
+  if (RootWidth >= OtherWidth) {
+    // The root sext_inreg is entirely redundant because the other one
+    // is narrower.
+    Observer.changingAllUsesOfReg(MRI, Dst);
+    MRI.replaceRegWith(Dst, OtherDst);
+    Observer.finishedChangingAllUsesOfReg();
+  } else {
+    // RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
+    // other G_SEXT_INREG.
+    Builder.buildSExtInReg(Dst, Src, RootWidth);
+  }
+
+  Root.eraseFromParent();
+}
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
new file mode 100644
index 0000000000000..566ee8e6c338d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
@@ -0,0 +1,164 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 
-run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: inreg8_inreg16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: inreg8_inreg16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+    $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: inreg16_inreg16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+    $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: inreg16_inreg8
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    %inreg1:_(s32) = G_SEXT_INREG %inreg, 8
+    $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg32_64bit
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: inreg16_inreg32_64bit
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+    %copy:_(s64) = COPY $vgpr0_vgpr1
+    %inreg:_(s64) = G_SEXT_INREG %copy, 16
+    %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+    $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg32_64bit
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: inreg32_inreg32_64bit
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+    %copy:_(s64) = COPY $vgpr0_vgpr1
+    %inreg:_(s64) = G_SEXT_INREG %copy, 32
+    %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+    $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg16_64bit
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: inreg32_inreg16_64bit
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
+    %copy:_(s64) = COPY $vgpr0_vgpr1
+    %inreg:_(s64) = G_SEXT_INREG %copy, 32
+    %inreg1:_(s64) = G_SEXT_INREG %inreg, 16
+    $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: vector_inreg8_inreg16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: vector_inreg8_inreg16
+    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+    %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+    %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: vector_inreg16_inreg16
+    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+    %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+    %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: vector_inreg16_inreg8
+    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
+    %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+    %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
diff --git 
a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
new file mode 100644
index 0000000000000..c60c137b17f84
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 
-run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that 
can arise when
+# CGP widening of uniform i16 ops is disabled.
+# Two separate combines make it happen (sext_trunc and 
sext_inreg_of_sext_inreg).
+
+---
+name: trunc_s16_inreg_8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: trunc_s16_inreg_8
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    %trunc:_(s16) = G_TRUNC %inreg
+    %sext:_(s32) = G_SEXT %trunc
+    $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s16_inreg_16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: trunc_s16_inreg_16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    %trunc:_(s16) = G_TRUNC %inreg
+    %sext:_(s32) = G_SEXT %trunc
+    $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s8_inreg_16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: trunc_s8_inreg_16
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 16
+    %trunc:_(s8) = G_TRUNC %inreg
+    %sext:_(s32) = G_SEXT %trunc
+    $vgpr0 = COPY %sext
+...
+
+# TODO?: We could handle this by inserting a trunc, but I'm not sure how 
useful that'd be.
+---
+name: mismatching_types
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: mismatching_types
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
+    ; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
+    ; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
+    ; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
+    %copy:_(s32) = COPY $vgpr0
+    %inreg:_(s32) = G_SEXT_INREG %copy, 8
+    %trunc:_(s8) = G_TRUNC %inreg
+    %sext:_(s16) = G_SEXT %trunc
+    %anyext:_(s32) = G_ANYEXT %sext
+    $vgpr0 = COPY %anyext
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 41e915a4c1011..18a222e56fd0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg 
%arg) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_sext_i32_i8 s0, s0
 ; GFX10-NEXT:    s_sext_i32_i8 s1, s1
-; GFX10-NEXT:    s_sext_i32_i16 s0, s0
-; GFX10-NEXT:    s_sext_i32_i16 s1, s1
 ; GFX10-NEXT:    s_abs_i32 s0, s0
 ; GFX10-NEXT:    s_abs_i32 s1, s1
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg 
%arg) {
 ; GFX10-NEXT:    s_sext_i32_i8 s0, s0
 ; GFX10-NEXT:    s_sext_i32_i8 s1, s1
 ; GFX10-NEXT:    s_sext_i32_i8 s2, s2
-; GFX10-NEXT:    s_sext_i32_i16 s0, s0
-; GFX10-NEXT:    s_sext_i32_i16 s1, s1
-; GFX10-NEXT:    s_sext_i32_i16 s2, s2
 ; GFX10-NEXT:    s_abs_i32 s0, s0
 ; GFX10-NEXT:    s_abs_i32 s1, s1
 ; GFX10-NEXT:    s_abs_i32 s2, s2

``````````

</details>


https://github.com/llvm/llvm-project/pull/131624
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [GlobalISel] Combine redundant sext_inreg (PR #131624)

Reply via email to