llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-loongarch Author: wanglei (wangleiat) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/106941.diff 3 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+41) - (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+4) - (modified) llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll (+41-148) ``````````diff diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 95c1b150722f64..0e17ce7ea02bb4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -283,6 +283,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETUGT}, VT, Expand); } + setOperationAction(ISD::CTPOP, GRLenVT, Legal); } // Set operations for 'LASX' feature. @@ -4488,6 +4489,44 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI, + MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + assert(Subtarget.hasExtLSX()); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::LSX128RegClass; + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register ScratchReg1 = MRI.createVirtualRegister(RC); + Register ScratchReg2 = MRI.createVirtualRegister(RC); + Register ScratchReg3 = MRI.createVirtualRegister(RC); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0); + BuildMI(*BB, MI, DL, + TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D + : LoongArch::VINSGR2VR_W), + ScratchReg2) + .addReg(ScratchReg1) + .addReg(Src) + .addImm(0); + BuildMI( + *BB, MI, DL, + TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W), + ScratchReg3) + .addReg(ScratchReg2); + BuildMI(*BB, MI, DL, + TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D + : LoongArch::VPICKVE2GR_W), + Dst) + .addReg(ScratchReg3) + .addImm(0); + + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -4546,6 +4585,8 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( case LoongArch::PseudoXVINSGR2VR_B: case LoongArch::PseudoXVINSGR2VR_H: return emitPseudoXVINSGR2VR(MI, BB, Subtarget); + case LoongArch::PseudoCTPOP: + return emitPseudoCTPOP(MI, BB, Subtarget); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 659ba38c695d33..e7ac9f3bd04cbf 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1238,6 +1238,10 @@ def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>; def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>; def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>; +let usesCustomInserter = 1 in +def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj), + [(set GPR:$rd, (ctpop GPR:$rj))]>; + } // Predicates = [HasExtLSX] multiclass PatVr<SDPatternOperator OpNode, string Inst> { diff --git a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll index a5cffb29eec614..c01f3cdb405682 100644 --- a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll +++ b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll @@ -10,30 +10,20 @@ declare i64 @llvm.ctpop.i64(i64) define i8 @test_ctpop_i8(i8 %a) nounwind { ; LA32-LABEL: test_ctpop_i8: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a0, 1 -; LA32-NEXT: andi $a1, $a1, 85 -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: andi $a1, $a0, 51 -; LA32-NEXT: srli.w $a0, $a0, 2 -; LA32-NEXT: andi $a0, $a0, 51 -; LA32-NEXT: add.w $a0, $a1, $a0 -; LA32-NEXT: srli.w $a1, $a0, 4 -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: andi $a0, $a0, 15 +; LA32-NEXT: andi $a0, $a0, 255 +; LA32-NEXT: vldi $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32-NEXT: vpcnt.w $vr0, $vr0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: test_ctpop_i8: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: andi $a1, $a1, 85 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: andi $a1, $a0, 51 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: andi $a0, $a0, 51 -; LA64-NEXT: add.d $a0, $a1, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: andi $a0, $a0, 15 +; LA64-NEXT: andi $a0, $a0, 255 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i8 @llvm.ctpop.i8(i8 %a) ret i8 %1 @@ -42,42 +32,20 @@ define i8 @test_ctpop_i8(i8 %a) nounwind { define i16 @test_ctpop_i16(i16 %a) nounwind { ; LA32-LABEL: test_ctpop_i16: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a0, 1 -; LA32-NEXT: lu12i.w $a2, 5 -; LA32-NEXT: ori $a2, $a2, 1365 -; LA32-NEXT: and $a1, $a1, $a2 -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: lu12i.w $a1, 3 -; LA32-NEXT: ori $a1, $a1, 819 -; LA32-NEXT: and $a2, $a0, $a1 -; LA32-NEXT: srli.w $a0, $a0, 2 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: add.w $a0, $a2, $a0 -; LA32-NEXT: srli.w $a1, $a0, 4 -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8 -; LA32-NEXT: andi $a0, $a0, 15 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 +; LA32-NEXT: vldi $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32-NEXT: vpcnt.w $vr0, $vr0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: test_ctpop_i16: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 5 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 3 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8 -; LA64-NEXT: andi $a0, $a0, 15 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %1 @@ -86,50 +54,19 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { define i32 @test_ctpop_i32(i32 %a) nounwind { ; LA32-LABEL: test_ctpop_i32: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a1, $a0, 1 -; LA32-NEXT: lu12i.w $a2, 349525 -; LA32-NEXT: ori $a2, $a2, 1365 -; LA32-NEXT: and $a1, $a1, $a2 -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: lu12i.w $a1, 209715 -; LA32-NEXT: ori $a1, $a1, 819 -; LA32-NEXT: and $a2, $a0, $a1 -; LA32-NEXT: srli.w $a0, $a0, 2 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: add.w $a0, $a2, $a0 -; LA32-NEXT: srli.w $a1, $a0, 4 -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: lu12i.w $a1, 61680 -; LA32-NEXT: ori $a1, $a1, 3855 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: lu12i.w $a1, 4112 -; LA32-NEXT: ori $a1, $a1, 257 -; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: vldi $vr0, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32-NEXT: vpcnt.w $vr0, $vr0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32-NEXT: ret ; ; LA64-LABEL: test_ctpop_i32: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 349525 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 209715 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 61680 -; LA64-NEXT: ori $a1, $a1, 3855 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 4112 -; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -138,68 +75,24 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { define i64 @test_ctpop_i64(i64 %a) nounwind { ; LA32-LABEL: test_ctpop_i64: ; LA32: # %bb.0: -; LA32-NEXT: srli.w $a2, $a1, 1 -; LA32-NEXT: lu12i.w $a3, 349525 -; LA32-NEXT: ori $a3, $a3, 1365 -; LA32-NEXT: and $a2, $a2, $a3 -; LA32-NEXT: sub.w $a1, $a1, $a2 -; LA32-NEXT: lu12i.w $a2, 209715 -; LA32-NEXT: ori $a2, $a2, 819 -; LA32-NEXT: and $a4, $a1, $a2 -; LA32-NEXT: srli.w $a1, $a1, 2 -; LA32-NEXT: and $a1, $a1, $a2 -; LA32-NEXT: add.w $a1, $a4, $a1 -; LA32-NEXT: srli.w $a4, $a1, 4 -; LA32-NEXT: add.w $a1, $a1, $a4 -; LA32-NEXT: lu12i.w $a4, 61680 -; LA32-NEXT: ori $a4, $a4, 3855 -; LA32-NEXT: and $a1, $a1, $a4 -; LA32-NEXT: lu12i.w $a5, 4112 -; LA32-NEXT: ori $a5, $a5, 257 -; LA32-NEXT: mul.w $a1, $a1, $a5 -; LA32-NEXT: srli.w $a1, $a1, 24 -; LA32-NEXT: srli.w $a6, $a0, 1 -; LA32-NEXT: and $a3, $a6, $a3 -; LA32-NEXT: sub.w $a0, $a0, $a3 -; LA32-NEXT: and $a3, $a0, $a2 -; LA32-NEXT: srli.w $a0, $a0, 2 -; LA32-NEXT: and $a0, $a0, $a2 -; LA32-NEXT: add.w $a0, $a3, $a0 -; LA32-NEXT: srli.w $a2, $a0, 4 -; LA32-NEXT: add.w $a0, $a0, $a2 -; LA32-NEXT: and $a0, $a0, $a4 -; LA32-NEXT: mul.w $a0, $a0, $a5 -; LA32-NEXT: srli.w $a0, $a0, 24 +; LA32-NEXT: vldi $vr0, 0 +; LA32-NEXT: vldi $vr1, 0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0 +; LA32-NEXT: vpcnt.w $vr1, $vr1 +; LA32-NEXT: vpickve2gr.w $a1, $vr1, 0 +; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; LA32-NEXT: vpcnt.w $vr0, $vr0 +; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0 ; LA32-NEXT: add.w $a0, $a0, $a1 ; LA32-NEXT: move $a1, $zero ; LA32-NEXT: ret ; ; LA64-LABEL: test_ctpop_i64: ; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 1 -; LA64-NEXT: lu12i.w $a2, 349525 -; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: bstrins.d $a2, $a2, 62, 32 -; LA64-NEXT: and $a1, $a1, $a2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 209715 -; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: bstrins.d $a1, $a1, 61, 32 -; LA64-NEXT: and $a2, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 2 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: srli.d $a1, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 61680 -; LA64-NEXT: ori $a1, $a1, 3855 -; LA64-NEXT: bstrins.d $a1, $a1, 59, 32 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: lu12i.w $a1, 4112 -; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: bstrins.d $a1, $a1, 56, 32 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 56 +; LA64-NEXT: vldi $vr0, 0 +; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; LA64-NEXT: vpcnt.d $vr0, $vr0 +; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0 ; LA64-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 `````````` </details> https://github.com/llvm/llvm-project/pull/106941 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits