llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-globalisel Author: None (vangthao95) <details> <summary>Changes</summary> Patch 2 of 4 patches to implement full G_MUL support in regbanklegalize. Current mul.ll test is only partially updated and expected to fail. It will be updated in the fourth patch. --- Patch is 30.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175887.diff 5 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (+131) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+4) - (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h (+2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir (+47-162) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index f21b87c8f92f0..8cea1fa98cd02 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -19,6 +19,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineUniformityAnalysis.h" @@ -716,6 +717,131 @@ bool RegBankLegalizeHelper::lowerSplitTo16(MachineInstr &MI) { return true; } +bool RegBankLegalizeHelper::lowerUniMAD64(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register Src0 = MI.getOperand(2).getReg(); + Register Src1 = MI.getOperand(3).getReg(); + Register Src2 = MI.getOperand(4).getReg(); + + bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; + + bool DstOnValu = MRI.getRegBankOrNull(Src2) == VgprRB; + bool Accumulate = true; + + if (!DstOnValu) { + if (mi_match(Src2, MRI, MIPatternMatch::m_ZeroInt())) + Accumulate = false; + } + + // Keep the multiplication on the SALU. + Register DstHi; + Register DstLo = B.buildMul({SgprRB, S32}, Src0, Src1).getReg(0); + bool MulHiInVgpr = false; + + const GCNSubtarget &ST = B.getMF().getSubtarget<GCNSubtarget>(); + unsigned MulHOpc = IsUnsigned ? AMDGPU::G_UMULH : AMDGPU::G_SMULH; + + if (ST.hasScalarMulHiInsts()) { + DstHi = B.buildInstr(MulHOpc, {{SgprRB, S32}}, {Src0, Src1}).getReg(0); + } else { + Register VSrc0 = B.buildCopy({VgprRB, S32}, Src0).getReg(0); + Register VSrc1 = B.buildCopy({VgprRB, S32}, Src1).getReg(0); + + DstHi = B.buildInstr(MulHOpc, {{VgprRB, S32}}, {VSrc0, VSrc1}).getReg(0); + + if (!DstOnValu) { + Register DstHiSgpr = + MRI.createVirtualRegister({SgprRB, MRI.getType(DstHi)}); + buildReadAnyLane(B, DstHiSgpr, DstHi, RBI); + DstHi = DstHiSgpr; + } else { + MulHiInVgpr = true; + } + } + + // Accumulate and produce the "carry-out" bit. + + // The "carry-out" is defined as bit 64 of the result when computed as a + // big integer. For unsigned multiply-add, this matches the usual + // definition of carry-out. For signed multiply-add, bit 64 is the sign + // bit of the result, which is determined as: + // sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add + LLT CarryType = DstOnValu ? S1 : S32; + const RegisterBank &CarryBank = DstOnValu ? *VccRB : *SgprRB; + const RegisterBank &DstBank = DstOnValu ? *VgprRB : *SgprRB; + Register Carry; + Register Zero; + + if (!IsUnsigned) { + // Register Zero, Carry; + + if (MulHiInVgpr) { + Zero = MRI.createVirtualRegister({VgprRB, S32}); + Carry = MRI.createVirtualRegister({VccRB, S1}); + } else { + Zero = MRI.createVirtualRegister({SgprRB, S32}); + Carry = MRI.createVirtualRegister({SgprRB, S32}); + } + + B.buildConstant(Zero, 0); + B.buildICmp(CmpInst::ICMP_SLT, Carry, DstHi, Zero); + + if (DstOnValu && !MulHiInVgpr) { + Carry = B.buildTrunc({VccRB, S1}, Carry).getReg(0); + } + } + + if (Accumulate) { + if (DstOnValu) { + DstLo = B.buildCopy({VgprRB, S32}, DstLo).getReg(0); + DstHi = B.buildCopy({VgprRB, S32}, DstHi).getReg(0); + } + + Register Src2Lo = MRI.createVirtualRegister({&DstBank, S32}); + Register Src2Hi = MRI.createVirtualRegister({&DstBank, S32}); + B.buildUnmerge({Src2Lo, Src2Hi}, Src2); + + if (!IsUnsigned) { + Register Src2Sign = MRI.createVirtualRegister({&CarryBank, CarryType}); + Register XorCarry = MRI.createVirtualRegister({&CarryBank, CarryType}); + B.buildICmp(CmpInst::ICMP_SLT, Src2Sign, Src2Hi, Zero); + Carry = B.buildXor(XorCarry, Carry, Src2Sign).getReg(0); + } + Register AddLo = MRI.createVirtualRegister({&DstBank, S32}); + Register CarryLo = MRI.createVirtualRegister({&CarryBank, CarryType}); + DstLo = B.buildUAddo(AddLo, CarryLo, DstLo, Src2Lo).getReg(0); + + Register AddHi = MRI.createVirtualRegister({&DstBank, S32}); + Register CarryHi = MRI.createVirtualRegister({&CarryBank, CarryType}); + + DstHi = B.buildUAdde(AddHi, CarryHi, DstHi, Src2Hi, CarryLo).getReg(0); + + if (IsUnsigned) { + Carry = CarryHi; + } else { + Register CarryXor = MRI.createVirtualRegister({&CarryBank, CarryType}); + Carry = B.buildXor(CarryXor, Carry, CarryHi).getReg(0); + } + } else { + if (IsUnsigned) { + Register CarryZero = MRI.createVirtualRegister({&CarryBank, CarryType}); + Carry = B.buildConstant(CarryZero, 0).getReg(0); + } + } + + B.buildMergeLikeInstr(Dst0, {DstLo, DstHi}); + + if (DstOnValu) { + B.buildCopy(Dst1, Carry); + } else { + if (!MRI.use_empty(Dst1)) + B.buildTrunc(Dst1, Carry); + } + MI.eraseFromParent(); + return true; +} + bool RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) { Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); @@ -857,6 +983,8 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI, return lowerV_BFE(MI); case S_BFE: return lowerS_BFE(MI); + case UniMAD64: + return lowerUniMAD64(MI); case SplitTo32: return lowerSplitTo32(MI); case SplitTo32Select: @@ -933,6 +1061,7 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI, LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) { switch (ID) { + case SgprS1: case Vcc: case UniInVcc: return LLT::scalar(1); @@ -1057,6 +1186,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) { switch (ID) { case Vcc: return VccRB; + case SgprS1: case Sgpr16: case Sgpr32: case Sgpr32_WF: @@ -1144,6 +1274,7 @@ bool RegBankLegalizeHelper::applyMappingDst( switch (MethodIDs[OpIdx]) { // vcc, sgpr and vgpr scalars, pointers and vectors case Vcc: + case SgprS1: case Sgpr16: case Sgpr32: case Sgpr64: diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index 1dc0278d6d90d..f92ed3de6cf27 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -125,6 +125,7 @@ class RegBankLegalizeHelper { bool lowerUnpackBitShift(MachineInstr &MI); bool lowerV_BFE(MachineInstr &MI); bool lowerS_BFE(MachineInstr &MI); + bool lowerUniMAD64(MachineInstr &MI); bool lowerSplitTo32(MachineInstr &MI); bool lowerSplitTo16(MachineInstr &MI); bool lowerSplitTo32Select(MachineInstr &MI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 6636af46ec879..991a85b670a76 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -501,6 +501,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi) .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi); + addRulesForGOpcs({G_AMDGPU_MAD_U64_U32, G_AMDGPU_MAD_I64_I32}, Standard) + .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}}) + .Uni(S64, {{Sgpr64, SgprS1}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64}); + addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB) .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 1ac117304b76f..77ed0b7fe7920 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -134,6 +134,7 @@ enum RegBankLLTMappingApplyID { Vcc, // sgpr scalars, pointers, vectors and B-types + SgprS1, Sgpr16, Sgpr32, Sgpr64, @@ -224,6 +225,7 @@ enum LoweringMethodID { S_BFE, V_BFE, VgprToVccCopy, + UniMAD64, SplitTo32, ScalarizeToS16, SplitTo32Select, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir index cce4beacafdb2..3d7b94613d15b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mad_64_32.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX8 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX9MI %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX9MI %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck --check-prefixes=CHECK,GFX10 %s --- name: mad_u64_u32_sss @@ -10,8 +10,6 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - ; - ; ; GFX8-LABEL: name: mad_u64_u32_sss ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX8-NEXT: {{ $}} @@ -23,13 +21,13 @@ body: | ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UMULH]] ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[AMDGPU_READANYLANE]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; ; GFX9MI-LABEL: name: mad_u64_u32_sss ; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX9MI-NEXT: {{ $}} @@ -44,7 +42,7 @@ body: | ; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) + ; ; GFX10-LABEL: name: mad_u64_u32_sss ; GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX10-NEXT: {{ $}} @@ -59,7 +57,6 @@ body: | ; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] ; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]] ; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 @@ -75,55 +72,17 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; - ; - ; GFX8-LABEL: name: mad_u64_u32_ssv - ; GFX8: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]] - ; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) - ; GFX9MI-LABEL: name: mad_u64_u32_ssv - ; GFX9MI: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX9MI-NEXT: {{ $}} - ; GFX9MI-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) - ; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] - ; GFX10-LABEL: name: mad_u64_u32_ssv - ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 - ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32) - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32) - ; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]] - ; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]] - ; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1) + ; CHECK-LABEL: name: mad_u64_u32_ssv + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $vgpr0 @@ -139,8 +98,6 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_svs ; CHECK: liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} @@ -167,8 +124,6 @@ legalized: true body: | bb.0: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_svv ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} @@ -194,8 +149,6 @@ legalized: true body: | bb.0: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vss ; CHECK: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 ; CHECK-NEXT: {{ $}} @@ -222,8 +175,6 @@ legalized: true body: | bb.0: liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vsv ; CHECK: liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} @@ -249,8 +200,6 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vvs ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1 ; CHECK-NEXT: {{ $}} @@ -276,8 +225,6 @@ legalized: true body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; - ; ; CHECK-LABEL: name: mad_u64_u32_vvv ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} @@ -302,8 +249,6 @@ legalized: true body: | bb.0: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 - ; - ; ; GFX8-LABEL: name: mad_i64_i32_sss ; GFX8: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX8-NEXT: {{ $}} @@ -315,18 +260,18 @@ body: | ; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]] ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]] - ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec + ; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]] + ; GFX8-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[SMULH]] ; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]] + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[AMDGPU_READANYLANE]](s32), [[C]] ; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]] ; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]] ; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]] - ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[AMDGPU_READANYLANE]], [[UV1]], [[UADDO1]] ; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] ; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; ; GFX9MI-LABEL: name: mad_i64_i32_sss ; GFX9MI: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 ; GFX9MI-NEXT: {{ $}} @@ -346,7 +291,7 @@ body: | ; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]] ; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]] ; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32) + ; ; GFX10... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/175887 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
