https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/176596
These opcodes are created together for the i64->i16 signed clamp pattern. >From a93734b78b8968b34695873f5c7277a10b2b0ea5 Mon Sep 17 00:00:00 2001 From: Vang Thao <[email protected]> Date: Sat, 17 Jan 2026 14:04:56 -0800 Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize rules for SMED3 and CVT_PK_I16_I32 These opcodes are created together for the i64->i16 signed clamp pattern. --- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 ++++ .../AMDGPU/GlobalISel/combine-short-clamp.ll | 8 ++-- .../regbankselect-amdgpu-cvt-pk-i16-i32.mir | 41 +++++++++++++++++ .../GlobalISel/regbankselect-amdgpu-smed3.mir | 46 +++++++++++++++++++ 4 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index ce80a94f29222..def076525c470 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1060,6 +1060,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}) .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}); + addRulesForGOpcs({G_AMDGPU_SMED3}, Standard) + .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}) + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}); + + addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard) + .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}}) + .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}}); + // FNEG and FABS are either folded as source modifiers or can be selected as // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for // targets without SALU float we still select them as VGPR since there would diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll index 7db49bca36062..d356153fe7360 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX678 %s -; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX9 %s -; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX10 %s -; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX678 %s +; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1010 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mcpu=gfx1100 -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GFX11 %s declare i64 @llvm.smax.i64(i64, i64) declare i64 @llvm.smin.i64(i64, i64) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir new file mode 100644 index 0000000000000..656f5a2fc1e05 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-cvt-pk-i16-i32.mir @@ -0,0 +1,41 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s + +--- +name: cvt_pk_i16_i32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: cvt_pk_i16_i32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_CVT_PK_I16_I32_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 [[COPY2]], [[COPY3]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[AMDGPU_CVT_PK_I16_I32_]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 %0, %1 +... + +--- +name: cvt_pk_i16_i32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: cvt_pk_i16_i32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AMDGPU_CVT_PK_I16_I32_:%[0-9]+]]:vgpr(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 [[COPY]], [[COPY1]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<2 x s16>) = G_AMDGPU_CVT_PK_I16_I32 %0, %1 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir new file mode 100644 index 0000000000000..a29d0bc9063e8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-smed3.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s + +--- +name: smed3_sss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: smed3_sss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY3]], [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[AMDGPU_SMED3_]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_AMDGPU_SMED3 %0, %1, %2 +... + +--- +name: smed3_vvv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: smed3_vvv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY1]], [[COPY2]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = G_AMDGPU_SMED3 %0, %1, %2 +... _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
