https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/161154
None >From f2affc005ca009009a9cfbb190f7dc0d9c661520 Mon Sep 17 00:00:00 2001 From: yangzhaoxin <[email protected]> Date: Mon, 29 Sep 2025 16:26:03 +0800 Subject: [PATCH 1/2] [LoongArch] Make rotl/rotr custom for lsx/lasx --- .../LoongArch/LoongArchISelLowering.cpp | 60 +++++++++++++++++++ .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../LoongArch/LoongArchLASXInstrInfo.td | 5 ++ .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 ++ 4 files changed, 71 insertions(+) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 94f53d5b85f10..073b2ddcd049e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -346,6 +346,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -426,6 +428,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32}) setOperationAction(ISD::BITREVERSE, VT, Custom); @@ -580,6 +584,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBF16_TO_FP(Op, DAG); case ISD::VECREDUCE_ADD: return lowerVECREDUCE_ADD(Op, DAG); + case ISD::ROTL: + case ISD::ROTR: + return lowerRotate(Op, DAG); case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: @@ -753,6 +760,59 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op, return Op; } +SDValue LoongArchTargetLowering::lowerRotate(SDValue Op, + SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + if (!VT.isVector()) + return Op; + + SDLoc DL(Op); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + unsigned Opcode = Op.getOpcode(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + if (SDValue SplatValue = + cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) { + if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) { + CstSplatValue = C->getAPIntValue(); + return true; + } + } + return false; + }; + + // check for constant splat rotation amount. + APInt CstSplatValue; + bool IsCstSplat = checkCstSplat(Amt, CstSplatValue); + bool isROTL = Opcode == ISD::ROTL; + + // Check for splat rotate by zero. + if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) + return R; + + // LoongArch tagets always prefers ISD::ROTR. + if (isROTL) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::ROTR, DL, VT, R, + DAG.getNode(ISD::SUB, DL, VT, Zero, Amt)); + } + + // Rotate by a immediate. + if (IsCstSplat) { + // ISD::ROTR: Attemp to rotate by a positive immediate. + SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT); + if (SDValue Urem = + DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits})) + return DAG.getNode(Op.getOpcode(), DL, VT, R, Urem); + } + + return Op; +} + // Return true if Val is equal to (setcc LHS, RHS, CC). // Return false if Val is the inverse of (setcc LHS, RHS, CC). // Otherwise, return std::nullopt. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 3c00296116ac2..d782498019914 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -411,6 +411,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index adfe990ba1234..bd6ab2a789b26 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1442,6 +1442,11 @@ defm : PatXrXr<sra, "XVSRA">; defm : PatShiftXrXr<sra, "XVSRA">; defm : PatShiftXrSplatUimm<sra, "XVSRAI">; +// XVROTR[I]_{B/H/W/D} +defm : PatXrXr<rotr, "XVROTR">; +defm : PatShiftXrXr<rotr, "XVROTR">; +defm : PatShiftXrSplatUimm<rotr, "XVROTRI">; + // XVCLZ_{B/H/W/D} defm : PatXr<ctlz, "XVCLZ">; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 2c36099f8eb71..a159954140fb9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1633,6 +1633,11 @@ defm : PatVrVr<sra, "VSRA">; defm : PatShiftVrVr<sra, "VSRA">; defm : PatShiftVrSplatUimm<sra, "VSRAI">; +// VROTR[I]_{B/H/W/D} +defm : PatVrVr<rotr, "VROTR">; +defm : PatShiftVrVr<rotr, "VROTR">; +defm : PatShiftVrSplatUimm<rotr, "VROTRI">; + // VCLZ_{B/H/W/D} defm : PatVr<ctlz, "VCLZ">; >From e8a821835c84788639a7d8ba6b2c7061fa187eb8 Mon Sep 17 00:00:00 2001 From: yangzhaoxin <[email protected]> Date: Mon, 29 Sep 2025 17:10:54 +0800 Subject: [PATCH 2/2] add tests --- llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll | 107 ++++++------------ llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll | 105 ++++++----------- 2 files changed, 70 insertions(+), 142 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll index f9f024dda973c..6b8ab2cdb94e1 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll @@ -7,11 +7,8 @@ define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 -; CHECK-NEXT: xvrepli.b $xr2, 8 -; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvneg.b $xr1, $xr1 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <32 x i8>, ptr %src @@ -30,11 +27,7 @@ define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2 -; CHECK-NEXT: xvrepli.b $xr2, 8 -; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <32 x i8>, ptr %src @@ -52,9 +45,7 @@ define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v32i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2 -; CHECK-NEXT: xvslli.b $xr0, $xr0, 6 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <32 x i8>, ptr %src @@ -70,11 +61,8 @@ define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 -; CHECK-NEXT: xvrepli.h $xr2, 16 -; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvneg.h $xr1, $xr1 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i16>, ptr %src @@ -93,11 +81,7 @@ define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2 -; CHECK-NEXT: xvrepli.h $xr2, 16 -; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i16>, ptr %src @@ -115,9 +99,7 @@ define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v16i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2 -; CHECK-NEXT: xvslli.h $xr0, $xr0, 14 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i16>, ptr %src @@ -133,11 +115,8 @@ define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 -; CHECK-NEXT: xvrepli.w $xr2, 32 -; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvneg.w $xr1, $xr1 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i32>, ptr %src @@ -156,11 +135,7 @@ define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2 -; CHECK-NEXT: xvrepli.w $xr2, 32 -; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1 -; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1 -; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2 -; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i32>, ptr %src @@ -178,9 +153,7 @@ define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v8i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2 -; CHECK-NEXT: xvslli.w $xr0, $xr0, 30 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i32>, ptr %src @@ -196,13 +169,10 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: xvld $xr0, $a1, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 -; LA32-NEXT: xvreplve0.d $xr1, $xr1 -; LA32-NEXT: xvrepli.d $xr2, 64 -; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1 -; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1 -; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2 -; LA32-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvneg.d $xr1, $xr1 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 ; LA32-NEXT: xvst $xr0, $a0, 0 ; LA32-NEXT: ret ; @@ -210,11 +180,8 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: xvld $xr0, $a1, 0 ; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 -; LA64-NEXT: xvrepli.d $xr2, 64 -; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1 -; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1 -; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2 -; LA64-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA64-NEXT: xvneg.d $xr1, $xr1 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 ; LA64-NEXT: xvst $xr0, $a0, 0 ; LA64-NEXT: ret %v0 = load <4 x i64>, ptr %src @@ -233,13 +200,9 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: xvld $xr0, $a1, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 -; LA32-NEXT: xvreplve0.d $xr1, $xr1 -; LA32-NEXT: xvrepli.d $xr2, 64 -; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1 -; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1 -; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2 -; LA32-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: xvpermi.q $xr1, $xr1, 2 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 ; LA32-NEXT: xvst $xr0, $a0, 0 ; LA32-NEXT: ret ; @@ -247,11 +210,7 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: xvld $xr0, $a1, 0 ; LA64-NEXT: xvreplgr2vr.d $xr1, $a2 -; LA64-NEXT: xvrepli.d $xr2, 64 -; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1 -; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1 -; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2 -; LA64-NEXT: xvor.v $xr0, $xr1, $xr0 +; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1 ; LA64-NEXT: xvst $xr0, $a0, 0 ; LA64-NEXT: ret %v0 = load <4 x i64>, ptr %src @@ -266,14 +225,20 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind { } define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind { -; CHECK-LABEL: rotr_v4i64_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2 -; CHECK-NEXT: xvslli.d $xr0, $xr0, 62 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: rotr_v4i64_imm: +; LA32: # %bb.0: +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvrepli.w $xr1, -62 +; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v4i64_imm: +; LA64: # %bb.0: +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvrotri.d $xr0, $xr0, 2 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret %v0 = load <4 x i64>, ptr %src %b = lshr <4 x i64> %v0, splat (i64 2) %c = shl <4 x i64> %v0, splat (i64 62) diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll index 79e74f35abafb..106a7b0e3f0a5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll @@ -7,11 +7,8 @@ define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 -; CHECK-NEXT: vrepli.b $vr2, 8 -; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1 -; CHECK-NEXT: vsll.b $vr1, $vr0, $vr1 -; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vneg.b $vr1, $vr1 +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i8>, ptr %src @@ -30,11 +27,7 @@ define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.b $vr1, $a2 -; CHECK-NEXT: vrepli.b $vr2, 8 -; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1 -; CHECK-NEXT: vsrl.b $vr1, $vr0, $vr1 -; CHECK-NEXT: vsll.b $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i8>, ptr %src @@ -52,9 +45,7 @@ define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v16i8_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vsrli.b $vr1, $vr0, 2 -; CHECK-NEXT: vslli.b $vr0, $vr0, 6 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrotri.b $vr0, $vr0, 2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <16 x i8>, ptr %src @@ -70,11 +61,8 @@ define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 -; CHECK-NEXT: vrepli.h $vr2, 16 -; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1 -; CHECK-NEXT: vsll.h $vr1, $vr0, $vr1 -; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vneg.h $vr1, $vr1 +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i16>, ptr %src @@ -93,11 +81,7 @@ define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.h $vr1, $a2 -; CHECK-NEXT: vrepli.h $vr2, 16 -; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1 -; CHECK-NEXT: vsrl.h $vr1, $vr0, $vr1 -; CHECK-NEXT: vsll.h $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i16>, ptr %src @@ -115,9 +99,7 @@ define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v8i16_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vsrli.h $vr1, $vr0, 2 -; CHECK-NEXT: vslli.h $vr0, $vr0, 14 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrotri.h $vr0, $vr0, 2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <8 x i16>, ptr %src @@ -133,11 +115,8 @@ define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 -; CHECK-NEXT: vrepli.w $vr2, 32 -; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1 -; CHECK-NEXT: vsll.w $vr1, $vr0, $vr1 -; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vneg.w $vr1, $vr1 +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i32>, ptr %src @@ -156,11 +135,7 @@ define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vreplgr2vr.w $vr1, $a2 -; CHECK-NEXT: vrepli.w $vr2, 32 -; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1 -; CHECK-NEXT: vsrl.w $vr1, $vr0, $vr1 -; CHECK-NEXT: vsll.w $vr0, $vr0, $vr2 -; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i32>, ptr %src @@ -178,9 +153,7 @@ define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind { ; CHECK-LABEL: rotr_v4i32_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vsrli.w $vr1, $vr0, 2 -; CHECK-NEXT: vslli.w $vr0, $vr0, 30 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vrotri.w $vr0, $vr0, 2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i32>, ptr %src @@ -196,13 +169,9 @@ define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 -; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 -; LA32-NEXT: vrepli.d $vr2, 64 -; LA32-NEXT: vsub.d $vr2, $vr2, $vr1 -; LA32-NEXT: vsll.d $vr1, $vr0, $vr1 -; LA32-NEXT: vsrl.d $vr0, $vr0, $vr2 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: vneg.d $vr1, $vr1 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; @@ -210,11 +179,8 @@ define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: vld $vr0, $a1, 0 ; LA64-NEXT: vreplgr2vr.d $vr1, $a2 -; LA64-NEXT: vrepli.d $vr2, 64 -; LA64-NEXT: vsub.d $vr2, $vr2, $vr1 -; LA64-NEXT: vsll.d $vr1, $vr0, $vr1 -; LA64-NEXT: vsrl.d $vr0, $vr0, $vr2 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 +; LA64-NEXT: vneg.d $vr1, $vr1 +; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1 ; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret %v0 = load <2 x i64>, ptr %src @@ -233,13 +199,8 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a1, 0 ; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0 -; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1 -; LA32-NEXT: vreplvei.d $vr1, $vr1, 0 -; LA32-NEXT: vrepli.d $vr2, 64 -; LA32-NEXT: vsub.d $vr2, $vr2, $vr1 -; LA32-NEXT: vsrl.d $vr1, $vr0, $vr1 -; LA32-NEXT: vsll.d $vr0, $vr0, $vr2 -; LA32-NEXT: vor.v $vr0, $vr1, $vr0 +; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 ; LA32-NEXT: vst $vr0, $a0, 0 ; LA32-NEXT: ret ; @@ -247,11 +208,7 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { ; LA64: # %bb.0: ; LA64-NEXT: vld $vr0, $a1, 0 ; LA64-NEXT: vreplgr2vr.d $vr1, $a2 -; LA64-NEXT: vrepli.d $vr2, 64 -; LA64-NEXT: vsub.d $vr2, $vr2, $vr1 -; LA64-NEXT: vsrl.d $vr1, $vr0, $vr1 -; LA64-NEXT: vsll.d $vr0, $vr0, $vr2 -; LA64-NEXT: vor.v $vr0, $vr1, $vr0 +; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1 ; LA64-NEXT: vst $vr0, $a0, 0 ; LA64-NEXT: ret %v0 = load <2 x i64>, ptr %src @@ -266,14 +223,20 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind { } define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind { -; CHECK-LABEL: rotr_v2i64_imm: -; CHECK: # %bb.0: -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vsrli.d $vr1, $vr0, 2 -; CHECK-NEXT: vslli.d $vr0, $vr0, 62 -; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: rotr_v2i64_imm: +; LA32: # %bb.0: +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vrepli.w $vr1, -62 +; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: rotr_v2i64_imm: +; LA64: # %bb.0: +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vrotri.d $vr0, $vr0, 2 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret %v0 = load <2 x i64>, ptr %src %b = lshr <2 x i64> %v0, splat (i64 2) %c = shl <2 x i64> %v0, splat (i64 62) _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
