https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/131312
>From 782153a9a47d4a0fdb897e811033179fa67c5060 Mon Sep 17 00:00:00 2001 From: pvanhout <pierre.vanhoutr...@amd.com> Date: Fri, 14 Mar 2025 10:34:51 +0100 Subject: [PATCH] [AMDGPU][GlobalISel] Combine (sext (trunc (sext_in_reg x))) This is a bit of an akward pattern that can come up as a result of legalization and then widening of i16 operations to i32 in RegBankSelect on AMDGPU. This quick combine avoids redundant patterns like ``` s_sext_i32_i8 s0, s0 s_sext_i32_i16 s0, s0 s_ashr_i32 s0, s0, s1 ``` With this the second sext is removed as it's redundant. --- .../include/llvm/Target/GlobalISel/Combine.td | 12 ++- .../combine-sext-trunc-sextinreg.mir | 86 +++++++++++++++++++ .../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 78 ++++------------- 3 files changed, 113 insertions(+), 63 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 3590ab221ad44..9727b86b4be8b 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -258,6 +258,14 @@ def sext_trunc_sextload : GICombineRule< [{ return Helper.matchSextTruncSextLoad(*${d}); }]), (apply [{ Helper.applySextTruncSextLoad(*${d}); }])>; +def sext_trunc_sextinreg : GICombineRule< + (defs root:$dst), + (match (G_SEXT_INREG $sir, $src, $width), + (G_TRUNC $trunc, $sir), + (G_SEXT $dst, $trunc), + [{ return (MRI.getType(${trunc}.getReg()).getScalarSizeInBits() >= ${width}.getImm()); }]), + (apply (GIReplaceReg $dst, $sir))>; + def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple<Register, unsigned>">; def sext_inreg_of_load : GICombineRule< (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo), @@ -1896,7 +1904,9 @@ def cast_of_cast_combines: GICombineGroup<[ sext_of_anyext, anyext_of_anyext, anyext_of_zext, - anyext_of_sext + anyext_of_sext, + + sext_trunc_sextinreg ]>; def cast_combines: GICombineGroup<[ diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir new file mode 100644 index 0000000000000..d41e5b172efc2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: trunc_s16_inreg_8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: trunc_s16_inreg_8 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %copy:_(s32) = COPY $vgpr0 + %inreg:_(s32) = G_SEXT_INREG %copy, 8 + %trunc:_(s16) = G_TRUNC %inreg + %sext:_(s32) = G_SEXT %trunc + $vgpr0 = COPY %sext +... + +--- +name: trunc_s16_inreg_16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: trunc_s16_inreg_16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16 + ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32) + %copy:_(s32) = COPY $vgpr0 + %inreg:_(s32) = G_SEXT_INREG %copy, 16 + %trunc:_(s16) = G_TRUNC %inreg + %sext:_(s32) = G_SEXT %trunc + $vgpr0 = COPY %sext +... + +--- +name: trunc_s8_inreg_16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: trunc_s8_inreg_16 + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16 + ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32) + ; CHECK-NEXT: %sext:_(s32) = G_SEXT %trunc(s8) + ; CHECK-NEXT: $vgpr0 = COPY %sext(s32) + %copy:_(s32) = COPY $vgpr0 + %inreg:_(s32) = G_SEXT_INREG %copy, 16 + %trunc:_(s8) = G_TRUNC %inreg + %sext:_(s32) = G_SEXT %trunc + $vgpr0 = COPY %sext +... + +# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be. +--- +name: mismatching_types +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: mismatching_types + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8 + ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32) + ; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8) + ; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16) + ; CHECK-NEXT: $vgpr0 = COPY %anyext(s32) + %copy:_(s32) = COPY $vgpr0 + %inreg:_(s32) = G_SEXT_INREG %copy, 8 + %trunc:_(s8) = G_TRUNC %inreg + %sext:_(s16) = G_SEXT %trunc + %anyext:_(s32) = G_ANYEXT %sext + $vgpr0 = COPY %anyext +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll index 8c687d85ac24b..7ec27f47578c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll @@ -197,33 +197,13 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) { } define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) { -; GFX6-LABEL: abs_sgpr_v2i8: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_sext_i32_i8 s0, s0 -; GFX6-NEXT: s_sext_i32_i8 s1, s1 -; GFX6-NEXT: s_abs_i32 s0, s0 -; GFX6-NEXT: s_abs_i32 s1, s1 -; GFX6-NEXT: ; return to shader part epilog -; -; GFX8-LABEL: abs_sgpr_v2i8: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_sext_i32_i8 s0, s0 -; GFX8-NEXT: s_sext_i32_i8 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_abs_i32 s0, s0 -; GFX8-NEXT: s_abs_i32 s1, s1 -; GFX8-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: abs_sgpr_v2i8: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_sext_i32_i8 s0, s0 -; GFX10-NEXT: s_sext_i32_i8 s1, s1 -; GFX10-NEXT: s_sext_i32_i16 s0, s0 -; GFX10-NEXT: s_sext_i32_i16 s1, s1 -; GFX10-NEXT: s_abs_i32 s0, s0 -; GFX10-NEXT: s_abs_i32 s1, s1 -; GFX10-NEXT: ; return to shader part epilog +; GFX-LABEL: abs_sgpr_v2i8: +; GFX: ; %bb.0: +; GFX-NEXT: s_sext_i32_i8 s0, s0 +; GFX-NEXT: s_sext_i32_i8 s1, s1 +; GFX-NEXT: s_abs_i32 s0, s0 +; GFX-NEXT: s_abs_i32 s1, s1 +; GFX-NEXT: ; return to shader part epilog %res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false) ret <2 x i8> %res } @@ -268,41 +248,15 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { } define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) { -; GFX6-LABEL: abs_sgpr_v3i8: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_sext_i32_i8 s0, s0 -; GFX6-NEXT: s_sext_i32_i8 s1, s1 -; GFX6-NEXT: s_sext_i32_i8 s2, s2 -; GFX6-NEXT: s_abs_i32 s0, s0 -; GFX6-NEXT: s_abs_i32 s1, s1 -; GFX6-NEXT: s_abs_i32 s2, s2 -; GFX6-NEXT: ; return to shader part epilog -; -; GFX8-LABEL: abs_sgpr_v3i8: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_sext_i32_i8 s0, s0 -; GFX8-NEXT: s_sext_i32_i8 s1, s1 -; GFX8-NEXT: s_sext_i32_i8 s2, s2 -; GFX8-NEXT: s_sext_i32_i16 s0, s0 -; GFX8-NEXT: s_sext_i32_i16 s1, s1 -; GFX8-NEXT: s_sext_i32_i16 s2, s2 -; GFX8-NEXT: s_abs_i32 s0, s0 -; GFX8-NEXT: s_abs_i32 s1, s1 -; GFX8-NEXT: s_abs_i32 s2, s2 -; GFX8-NEXT: ; return to shader part epilog -; -; GFX10-LABEL: abs_sgpr_v3i8: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_sext_i32_i8 s0, s0 -; GFX10-NEXT: s_sext_i32_i8 s1, s1 -; GFX10-NEXT: s_sext_i32_i8 s2, s2 -; GFX10-NEXT: s_sext_i32_i16 s0, s0 -; GFX10-NEXT: s_sext_i32_i16 s1, s1 -; GFX10-NEXT: s_sext_i32_i16 s2, s2 -; GFX10-NEXT: s_abs_i32 s0, s0 -; GFX10-NEXT: s_abs_i32 s1, s1 -; GFX10-NEXT: s_abs_i32 s2, s2 -; GFX10-NEXT: ; return to shader part epilog +; GFX-LABEL: abs_sgpr_v3i8: +; GFX: ; %bb.0: +; GFX-NEXT: s_sext_i32_i8 s0, s0 +; GFX-NEXT: s_sext_i32_i8 s1, s1 +; GFX-NEXT: s_sext_i32_i8 s2, s2 +; GFX-NEXT: s_abs_i32 s0, s0 +; GFX-NEXT: s_abs_i32 s1, s1 +; GFX-NEXT: s_abs_i32 s2, s2 +; GFX-NEXT: ; return to shader part epilog %res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false) ret <3 x i8> %res } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits