https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/175888
Patch 3 of 4 patches to implement full G_MUL support in regbanklegalize. Current mul.ll test is only partially updated and expected to fail. It will be updated in the fourth patch. >From fcdae3795584468953f6b9ea54f58db24dd24c2d Mon Sep 17 00:00:00 2001 From: Vang Thao <[email protected]> Date: Tue, 13 Jan 2026 20:36:55 -0800 Subject: [PATCH] [AMDGPU][GlobalISel] Add RegBankLegalize support for G_AMDGPU_S_MUL_* Patch 3 of 4 patches to implement full G_MUL support in regbanklegalize. Current mul.ll test is only partially updated and expected to fail. It will be updated in the fourth patch. --- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 19 ++++ .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 + .../AMDGPU/AMDGPURegBankLegalizeRules.h | 2 + .../AMDGPU/GlobalISel/regbankselect-smul.mir | 92 +++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 8cea1fa98cd02..1a8bd6d8de261 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -985,6 +985,25 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI, return lowerS_BFE(MI); case UniMAD64: return lowerUniMAD64(MI); + case S_Mul64: { + B.buildMul(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2)); + MI.eraseFromParent(); + return true; + } + case S_Mul64Div: { + auto Op1 = B.buildTrunc(VgprRB_S32, MI.getOperand(1)); + auto Op2 = B.buildTrunc(VgprRB_S32, MI.getOperand(2)); + auto Zero = B.buildConstant({VgprRB, S64}, 0); + + unsigned NewOpc = MI.getOpcode() == AMDGPU::G_AMDGPU_S_MUL_U64_U32 + ? AMDGPU::G_AMDGPU_MAD_U64_U32 + : AMDGPU::G_AMDGPU_MAD_I64_I32; + + B.buildInstr(NewOpc, {MI.getOperand(0).getReg(), {SgprRB, S32}}, + {Op1, Op2, Zero}); + MI.eraseFromParent(); + return true; + } case SplitTo32: return lowerSplitTo32(MI); case SplitTo32Select: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 991a85b670a76..5a03f6b5463ad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -505,6 +505,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}}) .Uni(S64, {{Sgpr64, SgprS1}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64}); + addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard) + .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, S_Mul64}) + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, S_Mul64Div}); + addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 77ed0b7fe7920..b5fd6683d319b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -226,6 +226,8 @@ enum LoweringMethodID { V_BFE, VgprToVccCopy, UniMAD64, + S_Mul64, + S_Mul64Div, SplitTo32, ScalarizeToS16, SplitTo32Select, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir new file mode 100644 index 0000000000000..ffec314968f85 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smul.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s + +--- +name: s_mul_u64_u32_ss +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: s_mul_u64_u32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 + $vgpr0_vgpr1 = COPY %2 + S_ENDPGM 0, implicit $vgpr0_vgpr1 +... + +--- +name: s_mul_u64_u32_vv +legalized: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: s_mul_u64_u32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_MAD_U64_U32 [[TRUNC]](s32), [[TRUNC1]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_MAD_U64_U32_]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AMDGPU_S_MUL_U64_U32 %0, %1 + $vgpr0_vgpr1 = COPY %2 + S_ENDPGM 0, implicit $vgpr0_vgpr1 +... + +--- +name: s_mul_i64_i32_ss +legalized: true +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: s_mul_i64_i32_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:sgpr(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MUL]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 + $vgpr0_vgpr1 = COPY %2 + S_ENDPGM 0, implicit $vgpr0_vgpr1 +... + +--- +name: s_mul_i64_i32_vv +legalized: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: s_mul_i64_i32_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_MAD_I64_I32 [[TRUNC]](s32), [[TRUNC1]], [[C]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AMDGPU_MAD_I64_I32_]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1 + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_AMDGPU_S_MUL_I64_I32 %0, %1 + $vgpr0_vgpr1 = COPY %2 + S_ENDPGM 0, implicit $vgpr0_vgpr1 +... _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
