https://github.com/heiher updated https://github.com/llvm/llvm-project/pull/201488
>From 6d12916c9d1675b23f4615ae88f7b3e11c64761b Mon Sep 17 00:00:00 2001 From: WANG Rui <[email protected]> Date: Fri, 29 May 2026 16:35:12 +0800 Subject: [PATCH 1/2] [LoongArch] Add DAG combine for horizontal widening add/sub Add a DAG combine to recognize horizontal widening add/subtract patterns and lower them to the corresponding LSX/LASX instructions. The following pattern is matched for both signed and unsigned variants: ``` ADD/SUB(SEXT/ZEXT(BUILD_VECTOR(extract_elt(vj, 1), extract_elt(vj, 3), ...)), SEXT/ZEXT(BUILD_VECTOR(extract_elt(vk, 0), extract_elt(vk, 2), ...))) ``` This covers the following instructions: ``` LSX: VHADDW.H.B, VHADDW.W.H, VHADDW.D.W VHADDW.HU.BU, VHADDW.WU.HU, VHADDW.DU.WU VHSUBW.H.B, VHSUBW.W.H, VHSUBW.D.W VHSUBW.HU.BU, VHSUBW.WU.HU, VHSUBW.DU.WU LASX: XVHADDW.H.B, XVHADDW.W.H, XVHADDW.D.W XVHADDW.HU.BU, XVHADDW.WU.HU, XVHADDW.DU.WU XVHSUBW.H.B, XVHSUBW.W.H, XVHSUBW.D.W XVHSUBW.HU.BU, XVHSUBW.WU.HU, XVHSUBW.DU.WU ``` The Q_D variants (VHADDW.Q.D, VHSUBW.Q.D and their unsigned counterparts) are not handled here because the result type v1i128 is not yet legalized in the backend. --- .../LoongArch/LoongArchISelLowering.cpp | 119 ++++++++++++++++++ .../LoongArch/LoongArchLASXInstrInfo.td | 28 ++++- .../Target/LoongArch/LoongArchLSXInstrInfo.td | 36 +++++- 3 files changed, 174 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 561064b3e1090..1b2258df77fa1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -509,6 +509,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasExtLSX()) { setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BITCAST); setTargetDAGCombine(ISD::VSELECT); @@ -1045,7 +1046,9 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op, LegalVecSize = 256; } + EleBits *= 2; for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) { + EleBits = std::min(EleBits, 64u); MVT IntTy = MVT::getIntegerVT(EleBits); MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits); Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val); @@ -5907,9 +5910,114 @@ static bool isConstantSplatVector(SDValue N, APInt &SplatValue, /*IsBigEndian=*/false); } +static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex) { + auto *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return SDValue(); + + SDValue Src; + int Start = -1; + + for (unsigned i = 0, NumElts = BV->getNumOperands(); i < NumElts; ++i) { + SDValue Op = BV->getOperand(i); + if (Op.isUndef()) + continue; + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + auto *IdxC = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!IdxC) + return SDValue(); + + unsigned EltIdx = IdxC->getZExtValue(); + if (Start < 0) + Start = (int)EltIdx - (int)(i * 2); + if (Start < 0 || Start > 1 || EltIdx != (unsigned)(Start + (int)(i * 2))) + return SDValue(); + + SDValue CurSrc = Op.getOperand(0); + if (!Src) + Src = CurSrc; + else if (Src != CurSrc) + return SDValue(); + } + + if (!Src || Start < 0) + return SDValue(); + + StartIndex = (unsigned)Start; + return Src; +} + +static SDValue +performHorizWideningCombine(SDNode *N, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + if (!Subtarget.hasExtLSX()) + return SDValue(); + + unsigned Opc = N->getOpcode(); + assert((Opc == ISD::ADD || Opc == ISD::SUB) && "Unexpected opcode"); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + bool isSigned; + unsigned ExtOpc = LHS.getOpcode(); + if (ExtOpc == ISD::SIGN_EXTEND) + isSigned = true; + else if (ExtOpc == ISD::ZERO_EXTEND) + isSigned = false; + else + return SDValue(); + + if (ExtOpc != RHS.getOpcode()) + return SDValue(); + + if (!LHS.hasOneUse() || !RHS.hasOneUse()) + return SDValue(); + + unsigned OddIdx, EvenIdx; + SDValue LHSVec = matchDeinterleaveBuildVector(LHS.getOperand(0), OddIdx); + SDValue RHSVec = matchDeinterleaveBuildVector(RHS.getOperand(0), EvenIdx); + + if (!LHSVec || !RHSVec) + return SDValue(); + if (OddIdx != 1 || EvenIdx != 0) + return SDValue(); + if (LHSVec.getValueType() != RHSVec.getValueType()) + return SDValue(); + + EVT SrcVT = LHSVec.getValueType(); + EVT SrcEltVT = SrcVT.getVectorElementType(); + EVT DstEltVT = VT.getVectorElementType(); + + if (!SrcVT.isVector() || !VT.isVector()) + return SDValue(); + if (SrcVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2) + return SDValue(); + if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32) + return SDValue(); + + unsigned TargetOpc; + if (Opc == ISD::ADD) + TargetOpc = isSigned ? LoongArchISD::VHADDW : LoongArchISD::VHADDW_U; + else + TargetOpc = isSigned ? LoongArchISD::VHSUBW : LoongArchISD::VHSUBW_U; + + return DAG.getNode(TargetOpc, DL, VT, LHSVec, RHSVec); +} + static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { + if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget)) + return V; + if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -6159,6 +6267,15 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget)) + return V; + + return SDValue(); +} + // Helper to peek through bitops/trunc/setcc to determine size of source vector. // Allows BITCASTCombine to determine what size vector generated a <X x i1>. static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, @@ -8061,6 +8178,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSETCCCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); + case ISD::SUB: + return performSUBCombine(N, DAG, DCI, Subtarget); case ISD::BITCAST: return performBITCASTCombine(N, DAG, DCI, Subtarget); case ISD::ANY_EXTEND: diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 0fb2b1d332f60..24a03d592dc92 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1213,16 +1213,27 @@ multiclass PatXrXrXr<SDPatternOperator OpNode, string Inst> { } multiclass PatXrXrW<SDPatternOperator OpNode, string Inst> { - def : Pat<(OpNode(v32i8 LASX256:$vj), (v32i8 LASX256:$vk)), + def : Pat<(v16i16 (OpNode(v32i8 LASX256:$vj), (v32i8 LASX256:$vk))), (!cast<LAInst>(Inst#"_H_B") LASX256:$vj, LASX256:$vk)>; - def : Pat<(OpNode(v16i16 LASX256:$vj), (v16i16 LASX256:$vk)), + def : Pat<(v8i32 (OpNode(v16i16 LASX256:$vj), (v16i16 LASX256:$vk))), (!cast<LAInst>(Inst#"_W_H") LASX256:$vj, LASX256:$vk)>; - def : Pat<(OpNode(v8i32 LASX256:$vj), (v8i32 LASX256:$vk)), + def : Pat<(v4i64 (OpNode(v8i32 LASX256:$vj), (v8i32 LASX256:$vk))), (!cast<LAInst>(Inst#"_D_W") LASX256:$vj, LASX256:$vk)>; - def : Pat<(OpNode(v4i64 LASX256:$vj), (v4i64 LASX256:$vk)), + def : Pat<(v4i64 (OpNode(v4i64 LASX256:$vj), (v4i64 LASX256:$vk))), (!cast<LAInst>(Inst#"_Q_D") LASX256:$vj, LASX256:$vk)>; } +multiclass PatXrXrWU<SDPatternOperator OpNode, string Inst> { + def : Pat<(v16i16 (OpNode(v32i8 LASX256:$vj), (v32i8 LASX256:$vk))), + (!cast<LAInst>(Inst#"_HU_BU") LASX256:$vj, LASX256:$vk)>; + def : Pat<(v8i32 (OpNode(v16i16 LASX256:$vj), (v16i16 LASX256:$vk))), + (!cast<LAInst>(Inst#"_WU_HU") LASX256:$vj, LASX256:$vk)>; + def : Pat<(v4i64 (OpNode(v8i32 LASX256:$vj), (v8i32 LASX256:$vk))), + (!cast<LAInst>(Inst#"_DU_WU") LASX256:$vj, LASX256:$vk)>; + def : Pat<(v4i64 (OpNode(v4i64 LASX256:$vj), (v4i64 LASX256:$vk))), + (!cast<LAInst>(Inst#"_QU_DU") LASX256:$vj, LASX256:$vk)>; +} + multiclass PatShiftXrXr<SDPatternOperator OpNode, string Inst> { def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, (v32i8 LASX256:$xk))), @@ -1634,6 +1645,15 @@ def : Pat<(bswap (v4i64 LASX256:$xj)), // XVHADDW_{H_B/W_H/D_W/Q_D} defm : PatXrXrW<loongarch_vhaddw, "XVHADDW">; +// XVHADDW_{HU_BU/WU_HU/DU_WU/QU_DU} +defm : PatXrXrWU<loongarch_vhaddw_u, "XVHADDW">; + +// XVHSUBW_{H_B/W_H/D_W/Q_D} +defm : PatXrXrW<loongarch_vhsubw, "XVHSUBW">; + +// XVHSUBW_{HU_BU/WU_HU/DU_WU/QU_DU} +defm : PatXrXrWU<loongarch_vhsubw_u, "XVHSUBW">; + // XVFADD_{S/D} defm : PatXrXrF<fadd, "XVFADD">; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 34d4e2435b39d..21861378cbb3d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -27,6 +27,7 @@ def SDT_LoongArchV2RUimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, GRLenVT>]>; +def SDT_LoongArchVHW : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>; def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>; def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; @@ -86,7 +87,12 @@ def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>; def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>; // Vector Horizontal Addition with Widening -def loongarch_vhaddw : SDNode<"LoongArchISD::VHADDW", SDT_LoongArchV2R>; +def loongarch_vhaddw : SDNode<"LoongArchISD::VHADDW", SDT_LoongArchVHW>; +def loongarch_vhaddw_u : SDNode<"LoongArchISD::VHADDW_U", SDT_LoongArchVHW>; + +// Vector Horizontal Subtration with Widening +def loongarch_vhsubw : SDNode<"LoongArchISD::VHSUBW", SDT_LoongArchVHW>; +def loongarch_vhsubw_u : SDNode<"LoongArchISD::VHSUBW_U", SDT_LoongArchVHW>; // Scalar load broadcast to vector def loongarch_vldrepl @@ -1439,16 +1445,27 @@ multiclass PatVrVrVr<SDPatternOperator OpNode, string Inst> { } multiclass PatVrVrW<SDPatternOperator OpNode, string Inst> { - def : Pat<(OpNode(v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + def : Pat<(v8i16 (OpNode(v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), (!cast<LAInst>(Inst#"_H_B") LSX128:$vj, LSX128:$vk)>; - def : Pat<(OpNode(v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + def : Pat<(v4i32 (OpNode(v8i16 LSX128:$vj), (v8i16 LSX128:$vk))), (!cast<LAInst>(Inst#"_W_H") LSX128:$vj, LSX128:$vk)>; - def : Pat<(OpNode(v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + def : Pat<(v2i64 (OpNode(v4i32 LSX128:$vj), (v4i32 LSX128:$vk))), (!cast<LAInst>(Inst#"_D_W") LSX128:$vj, LSX128:$vk)>; - def : Pat<(OpNode(v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + def : Pat<(v2i64 (OpNode(v2i64 LSX128:$vj), (v2i64 LSX128:$vk))), (!cast<LAInst>(Inst#"_Q_D") LSX128:$vj, LSX128:$vk)>; } +multiclass PatVrVrWU<SDPatternOperator OpNode, string Inst> { + def : Pat<(v8i16 (OpNode(v16i8 LSX128:$vj), (v16i8 LSX128:$vk))), + (!cast<LAInst>(Inst#"_HU_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v4i32 (OpNode(v8i16 LSX128:$vj), (v8i16 LSX128:$vk))), + (!cast<LAInst>(Inst#"_WU_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (OpNode(v4i32 LSX128:$vj), (v4i32 LSX128:$vk))), + (!cast<LAInst>(Inst#"_DU_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (OpNode(v2i64 LSX128:$vj), (v2i64 LSX128:$vk))), + (!cast<LAInst>(Inst#"_QU_DU") LSX128:$vj, LSX128:$vk)>; +} + multiclass PatShiftVrVr<SDPatternOperator OpNode, string Inst> { def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, (v16i8 LSX128:$vk))), @@ -1878,6 +1895,15 @@ def : Pat<(bswap (v2i64 LSX128:$vj)), // VHADDW_{H_B/W_H/D_W/Q_D} defm : PatVrVrW<loongarch_vhaddw, "VHADDW">; +// VHADDW_{HU_BU/WU_HU/DU_WU/QU_DU} +defm : PatVrVrWU<loongarch_vhaddw_u, "VHADDW">; + +// VHSUBW_{H_B/W_H/D_W/Q_D} +defm : PatVrVrW<loongarch_vhsubw, "VHSUBW">; + +// VHSUBW_{HU_BU/WU_HU/DU_WU/QU_DU} +defm : PatVrVrWU<loongarch_vhsubw_u, "VHSUBW">; + // VFADD_{S/D} defm : PatVrVrF<fadd, "VFADD">; >From 9a7e672cfda79f70443da036431d9082d1be4765 Mon Sep 17 00:00:00 2001 From: WANG Rui <[email protected]> Date: Tue, 16 Jun 2026 15:41:37 +0800 Subject: [PATCH 2/2] Fix DAG combine for illegal vector types --- .../LoongArch/LoongArchISelLowering.cpp | 3 + llvm/test/CodeGen/LoongArch/lasx/vhaddw.ll | 260 +----------------- llvm/test/CodeGen/LoongArch/lasx/vhsubw.ll | 260 +----------------- llvm/test/CodeGen/LoongArch/lsx/vhaddw.ll | 45 +-- llvm/test/CodeGen/LoongArch/lsx/vhsubw.ll | 45 +-- 5 files changed, 31 insertions(+), 582 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 1b2258df77fa1..e5143a1646799 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5993,7 +5993,10 @@ performHorizWideningCombine(SDNode *N, SelectionDAG &DAG, EVT SrcVT = LHSVec.getValueType(); EVT SrcEltVT = SrcVT.getVectorElementType(); EVT DstEltVT = VT.getVectorElementType(); + auto &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT)) + return SDValue(); if (!SrcVT.isVector() || !VT.isVector()) return SDValue(); if (SrcVT.getSizeInBits() != VT.getSizeInBits()) diff --git a/llvm/test/CodeGen/LoongArch/lasx/vhaddw.ll b/llvm/test/CodeGen/LoongArch/lasx/vhaddw.ll index 1f405d640b133..2304a5d6f83fd 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vhaddw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vhaddw.ll @@ -173,77 +173,9 @@ entry: define void @xvhaddw_h_b(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK-LABEL: xvhaddw_h_b: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr2, $xr2, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 15 -; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 -; CHECK-NEXT: vext2xv.h.b $xr1, $xr2 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -267,43 +199,7 @@ define void @xvhaddw_w_h(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -; CHECK-NEXT: vext2xv.w.h $xr1, $xr2 -; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 -; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -325,25 +221,7 @@ define void @xvhaddw_d_w(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 4 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; CHECK-NEXT: vext2xv.d.w $xr1, $xr2 -; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -361,77 +239,9 @@ entry: define void @xvhaddw_hu_bu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK-LABEL: xvhaddw_hu_bu: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr2, $xr2, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 15 -; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 -; CHECK-NEXT: vext2xv.hu.bu $xr1, $xr2 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -455,43 +265,7 @@ define void @xvhaddw_wu_hu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr2 -; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 -; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -513,25 +287,7 @@ define void @xvhaddw_du_wu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 4 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; CHECK-NEXT: vext2xv.du.wu $xr1, $xr2 -; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lasx/vhsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/vhsubw.ll index 856a7bf52e159..20b959aa74d7a 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vhsubw.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vhsubw.ll @@ -173,77 +173,9 @@ entry: define void @xvhsubw_h_b(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK-LABEL: xvhsubw_h_b: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr2, $xr2, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 15 -; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 -; CHECK-NEXT: vext2xv.h.b $xr1, $xr2 -; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -267,43 +199,7 @@ define void @xvhsubw_w_h(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -; CHECK-NEXT: vext2xv.w.h $xr1, $xr2 -; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 -; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -325,25 +221,7 @@ define void @xvhsubw_d_w(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 4 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; CHECK-NEXT: vext2xv.d.w $xr1, $xr2 -; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 -; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -361,77 +239,9 @@ entry: define void @xvhsubw_hu_bu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK-LABEL: xvhsubw_hu_bu: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr2, $xr2, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr2, 15 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 3 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 7 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 8 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 9 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 10 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 11 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 8 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 12 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 10 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 13 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 12 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 14 -; CHECK-NEXT: vpickve2gr.b $a0, $vr1, 14 -; CHECK-NEXT: vinsgr2vr.b $vr2, $a0, 15 -; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 -; CHECK-NEXT: vext2xv.hu.bu $xr1, $xr2 -; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -455,43 +265,7 @@ define void @xvhsubw_wu_hu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 -; CHECK-NEXT: vinsgr2vr.h $vr2, $a0, 7 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; CHECK-NEXT: xvpermi.d $xr1, $xr1, 14 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 0 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6 -; CHECK-NEXT: vpickve2gr.h $a0, $vr1, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 -; CHECK-NEXT: vext2xv.wu.hu $xr1, $xr2 -; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 -; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -513,25 +287,7 @@ define void @xvhsubw_du_wu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a0, 0 ; CHECK-NEXT: xvld $xr1, $a1, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 -; CHECK-NEXT: vinsgr2vr.w $vr2, $a0, 3 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 4 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: xvpickve2gr.w $a0, $xr1, 6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 -; CHECK-NEXT: vext2xv.du.wu $xr1, $xr2 -; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 -; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a2, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vhaddw.ll b/llvm/test/CodeGen/LoongArch/lsx/vhaddw.ll index 5a89e26de741f..8a75f7c234e8b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vhaddw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vhaddw.ll @@ -60,13 +60,7 @@ define void @vhaddw_h_b(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vpickev.b $vr1, $vr1, $vr1 -; CHECK-NEXT: vslti.b $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.b $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.b $vr1, $vr2, $vr1 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -88,13 +82,7 @@ define void @vhaddw_w_h(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vpickev.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vslti.h $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.h $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr2, $vr1 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -116,13 +104,7 @@ define void @vhaddw_d_w(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 13 -; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 8 -; CHECK-NEXT: vslti.w $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.w $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.w $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.w $vr1, $vr2, $vr1 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -142,12 +124,7 @@ define void @vhaddw_hu_bu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.b $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr1, $vr2 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -169,12 +146,7 @@ define void @vhaddw_wu_hu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.h $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.h $vr2, $vr3, $vr1 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr2 +; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -196,12 +168,7 @@ define void @vhaddw_du_wu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.w $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.w $vr2, $vr3, $vr1 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr2 +; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/vhsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/vhsubw.ll index 79d02daa0648f..6960c043163c7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vhsubw.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vhsubw.ll @@ -60,13 +60,7 @@ define void @vhsubw_h_b(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr0 -; CHECK-NEXT: vpickev.b $vr1, $vr1, $vr1 -; CHECK-NEXT: vslti.b $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.b $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.b $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.b $vr1, $vr2, $vr1 -; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -88,13 +82,7 @@ define void @vhsubw_w_h(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr0 -; CHECK-NEXT: vpickev.h $vr1, $vr1, $vr1 -; CHECK-NEXT: vslti.h $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.h $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.h $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.h $vr1, $vr2, $vr1 -; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -116,13 +104,7 @@ define void @vhsubw_d_w(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 13 -; CHECK-NEXT: vshuf4i.w $vr1, $vr1, 8 -; CHECK-NEXT: vslti.w $vr2, $vr0, 0 -; CHECK-NEXT: vilvl.w $vr0, $vr2, $vr0 -; CHECK-NEXT: vslti.w $vr2, $vr1, 0 -; CHECK-NEXT: vilvl.w $vr1, $vr2, $vr1 -; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -142,12 +124,7 @@ define void @vhsubw_hu_bu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.b $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.b $vr1, $vr3, $vr1, $vr2 -; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -169,12 +146,7 @@ define void @vhsubw_wu_hu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.h $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.h $vr2, $vr3, $vr1 -; CHECK-NEXT: vsub.w $vr0, $vr0, $vr2 +; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: @@ -196,12 +168,7 @@ define void @vhsubw_du_wu(ptr %a, ptr %b, ptr %r) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a0, 0 ; CHECK-NEXT: vld $vr1, $a1, 0 -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: vrepli.b $vr3, 0 -; CHECK-NEXT: vpackod.w $vr0, $vr3, $vr0 -; CHECK-NEXT: vshuf.w $vr2, $vr3, $vr1 -; CHECK-NEXT: vsub.d $vr0, $vr0, $vr2 +; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a2, 0 ; CHECK-NEXT: ret entry: _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
