llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) <details> <summary>Changes</summary> --- Patch is 176.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161368.diff 6 Files Affected: - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+139) - (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+10-1) - (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+41) - (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+48) - (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll (+182-3014) - (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mulwev_od.ll (+128-510) ``````````diff diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7ddf996f53f4c..9952cfaefee57 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -462,6 +462,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasExtLSX()) { setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BITCAST); + setTargetDAGCombine(ISD::MUL); } // Set DAG combine for 'LASX' feature. @@ -6679,6 +6680,136 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + + SDLoc DL(N); + EVT ResTy = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Note: v2i128 is an unsupported MVT vector type (see + // MachineValueType.h::getVectorVT()), use NumElements and SizeInBits to + // identify it. + bool HasLSXOnly = Subtarget.hasExtLSX() && !Subtarget.hasExtLASX(); + bool Isv2i128 = ResTy.isVector() && ResTy.getVectorNumElements() == 2 && + ResTy.getScalarSizeInBits() == 128; + if (ResTy != MVT::v8i16 && ResTy != MVT::v4i32 && ResTy != MVT::v2i64 && + ResTy != MVT::i128 && ResTy != MVT::v16i16 && ResTy != MVT::v8i32 && + ResTy != MVT::v4i64 && !Isv2i128) + return SDValue(); + if (HasLSXOnly && (ResTy.is256BitVector() || Isv2i128)) + return SDValue(); + + // Combine: + // ti,tii,...,tx = extract_vector_elt t0, {0,2,4,.../1,3,5,...} + // tj,tjj,...,ty = extract_vector_elt t1, {0,2,4,.../1,3,5,...} + // tm = BUILD_VECTOR ti,tii,...,tx (Only when ResTy != MVT::i128) + // tn = BUILD_VECTOR tj,tjj,...,ty (Only when ResTy != MVT::i128) + // ta = {sign/zero}_extend tm + // tb = {sign/zero}_extend tn + // tr = mul ta, tb + // to: + // tr = VMULW{EV/OD}[U/US] t0, t1 + auto getExtType = [](unsigned Op0, unsigned Op1) -> unsigned { + if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::SIGN_EXTEND) + return 0; + if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::ZERO_EXTEND) + return 1; + if (Op0 == ISD::ZERO_EXTEND && Op1 == ISD::SIGN_EXTEND) + return 2; + if (Op0 == ISD::SIGN_EXTEND && Op1 == ISD::ZERO_EXTEND) + return 3; + return -1; + }; + + unsigned ExtType = getExtType(N0.getOpcode(), N1.getOpcode()); + if (ExtType < 0) + return SDValue(); + + SDValue Src0 = N0.getOperand(0); + SDValue Src1 = N1.getOperand(0); + bool IsScalar = (ResTy == MVT::i128); + if (IsScalar && (Src0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Src1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)) + return SDValue(); + if (!IsScalar && (Src0.getOpcode() != ISD::BUILD_VECTOR || + Src1.getOpcode() != ISD::BUILD_VECTOR)) + return SDValue(); + + unsigned ResBits = ResTy.getScalarSizeInBits(); + unsigned Src0Bits = Src0.getValueType().getScalarSizeInBits(); + unsigned Src1Bits = Src1.getValueType().getScalarSizeInBits(); + if (Src0Bits != Src1Bits || ResBits != Src0Bits * 2) + return SDValue(); + + // Collect all EXTRACT_VECTOR_ELT. + SmallVector<std::pair<SDValue, SDValue>> Elems; + if (IsScalar) { + Elems.emplace_back(Src0, Src1); + } else { + for (unsigned i = 0; i < Src0.getNumOperands(); ++i) + Elems.emplace_back(Src0.getOperand(i), Src1.getOperand(i)); + } + + unsigned Index; + SDValue OrigN0, OrigN1; + bool First = true; + for (auto &[Op0, Op1] : Elems) { + // Each element must be EXTRACT_VECTOR_ELT. + if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // Check each EXTRACT_VECTOR_ELT's source vector and index. + if (Op0.getOperand(1) != Op1.getOperand(1)) + return SDValue(); + + auto *IdxC = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); + if (!IdxC) + return SDValue(); + + unsigned CurIdx = IdxC->getZExtValue(); + if (First) { + if (CurIdx != 0 && CurIdx != 1) + return SDValue(); + OrigN0 = Op0.getOperand(0); + OrigN1 = Op1.getOperand(0); + First = false; + } else { + if (CurIdx != Index + 2 || Op0.getOperand(0) != OrigN0 || + Op1.getOperand(0) != OrigN1) + return SDValue(); + } + Index = CurIdx; + } + + if (OrigN0.getValueType() != OrigN1.getValueType()) + return SDValue(); + if (OrigN0.getValueType().getVectorNumElements() != + (IsScalar ? 1 : ResTy.getVectorNumElements()) * 2) + return SDValue(); + + SDValue Result; + EVT OrigTy = OrigN0.getValueType(); + bool IsEven = (Index % 2 == 0); + + static const unsigned OpcTable[3][2] = { + {LoongArchISD::VMULWOD, LoongArchISD::VMULWEV}, + {LoongArchISD::VMULWODU, LoongArchISD::VMULWEVU}, + {LoongArchISD::VMULWODUS, LoongArchISD::VMULWEVUS}}; + + if (ExtType == 3) + Result = DAG.getNode(OpcTable[2][IsEven], DL, OrigTy, OrigN1, OrigN0); + else + Result = DAG.getNode(OpcTable[ExtType][IsEven], DL, OrigTy, OrigN0, OrigN1); + + return DAG.getBitcast(ResTy, Result); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -6714,6 +6845,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget); case ISD::EXTRACT_VECTOR_ELT: return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget); + case ISD::MUL: + return performMULCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -7526,6 +7659,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(XVMSKEQZ) NODE_NAME_CASE(XVMSKNEZ) NODE_NAME_CASE(VHADDW) + NODE_NAME_CASE(VMULWEV) + NODE_NAME_CASE(VMULWOD) + NODE_NAME_CASE(VMULWEVU) + NODE_NAME_CASE(VMULWODU) + NODE_NAME_CASE(VMULWEVUS) + NODE_NAME_CASE(VMULWODUS) } #undef NODE_NAME_CASE return nullptr; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 8a4d7748467c7..1e5632eb00f7b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -189,7 +189,16 @@ enum NodeType : unsigned { XVMSKNEZ, // Vector Horizontal Addition with Widening‌ - VHADDW + VHADDW, + + // Perform element-wise vector multiplication at even/odd indices, + // and keep each result in its corresponding widened slot + VMULWEV, + VMULWOD, + VMULWEVU, + VMULWODU, + VMULWEVUS, + VMULWODUS // Intrinsic operations end ============================================= }; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 5143d53bad719..7c28efd88ae09 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1328,6 +1328,39 @@ multiclass PairInsertExtractPatV4<ValueType vecty, ValueType elemty> { } } +multiclass XVmulwPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast<LAInst>(Inst#"_H_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast<LAInst>(Inst#"_W_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast<LAInst>(Inst#"_D_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast<LAInst>(Inst#"_Q_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass XVmulwuPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast<LAInst>(Inst#"_H_BU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast<LAInst>(Inst#"_W_HU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast<LAInst>(Inst#"_D_WU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast<LAInst>(Inst#"_Q_DU") LASX256:$xj, LASX256:$xk)>; +} + +multiclass XVmulwusPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast<LAInst>(Inst#"_H_BU_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast<LAInst>(Inst#"_W_HU_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast<LAInst>(Inst#"_D_WU_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast<LAInst>(Inst#"_Q_DU_D") LASX256:$xj, LASX256:$xk)>; +} + let Predicates = [HasExtLASX] in { // XVADD_{B/H/W/D} @@ -1365,6 +1398,14 @@ defm : PatXrXr<mul, "XVMUL">; defm : PatXrXr<mulhs, "XVMUH">; defm : PatXrXrU<mulhu, "XVMUH">; +// XVMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], XVMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D} +defm : XVmulwPat<loongarch_vmulwev, "XVMULWEV">; +defm : XVmulwPat<loongarch_vmulwod, "XVMULWOD">; +defm : XVmulwuPat<loongarch_vmulwevu, "XVMULWEV">; +defm : XVmulwuPat<loongarch_vmulwodu, "XVMULWOD">; +defm : XVmulwusPat<loongarch_vmulwevus, "XVMULWEV">; +defm : XVmulwusPat<loongarch_vmulwodus, "XVMULWOD">; + // XVMADD_{B/H/W/D} defm : PatXrXrXr<muladd, "XVMADD">; // XVMSUB_{B/H/W/D} diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 8d1dc99e316c9..e34f6d7e58610 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -82,6 +82,13 @@ def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>; def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>; def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>; +def loongarch_vmulwev: SDNode<"LoongArchISD::VMULWEV", SDT_LoongArchV2R>; +def loongarch_vmulwod: SDNode<"LoongArchISD::VMULWOD", SDT_LoongArchV2R>; +def loongarch_vmulwevu: SDNode<"LoongArchISD::VMULWEVU", SDT_LoongArchV2R>; +def loongarch_vmulwodu: SDNode<"LoongArchISD::VMULWODU", SDT_LoongArchV2R>; +def loongarch_vmulwevus: SDNode<"LoongArchISD::VMULWEVUS", SDT_LoongArchV2R>; +def loongarch_vmulwodus: SDNode<"LoongArchISD::VMULWODUS", SDT_LoongArchV2R>; + def immZExt1 : ImmLeaf<GRLenVT, [{return isUInt<1>(Imm);}]>; def immZExt2 : ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]>; def immZExt3 : ImmLeaf<GRLenVT, [{return isUInt<3>(Imm);}]>; @@ -1518,6 +1525,39 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> { } } +multiclass VmulwPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast<LAInst>(Inst#"_H_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast<LAInst>(Inst#"_W_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast<LAInst>(Inst#"_D_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast<LAInst>(Inst#"_Q_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass VmulwuPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast<LAInst>(Inst#"_H_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast<LAInst>(Inst#"_W_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast<LAInst>(Inst#"_D_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast<LAInst>(Inst#"_Q_DU") LSX128:$vj, LSX128:$vk)>; +} + +multiclass VmulwusPat<SDPatternOperator OpNode, string Inst> { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast<LAInst>(Inst#"_H_BU_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast<LAInst>(Inst#"_W_HU_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast<LAInst>(Inst#"_D_WU_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast<LAInst>(Inst#"_Q_DU_D") LSX128:$vj, LSX128:$vk)>; +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -1555,6 +1595,14 @@ defm : PatVrVr<mul, "VMUL">; defm : PatVrVr<mulhs, "VMUH">; defm : PatVrVrU<mulhu, "VMUH">; +// VMULW{EV/OD}_{H_B/W_H/D_W/Q_D}[U], VMULW{EV/OD}_{H_BU_B/W_HU_H/D_WU_W/Q_DU_D} +defm : VmulwPat<loongarch_vmulwev, "VMULWEV">; +defm : VmulwPat<loongarch_vmulwod, "VMULWOD">; +defm : VmulwuPat<loongarch_vmulwevu, "VMULWEV">; +defm : VmulwuPat<loongarch_vmulwodu, "VMULWOD">; +defm : VmulwusPat<loongarch_vmulwevus, "VMULWEV">; +defm : VmulwusPat<loongarch_vmulwodus, "VMULWOD">; + // VMADD_{B/H/W/D} defm : PatVrVrVr<muladd, "VMADD">; // VMSUB_{B/H/W/D} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll index c8796b839913c..ed3a31d12ee83 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mulwev_od.ll @@ -1,113 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s define void @vmulwev_h_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK-LABEL: vmulwev_h_b: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr3, $a1, 0 -; CHECK-NEXT: xvld $xr0, $a2, 0 -; CHECK-NEXT: xvpermi.d $xr2, $xr3, 14 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 0 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 2 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 4 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 6 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 8 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 10 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 12 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 -; CHECK-NEXT: vpickve2gr.b $a1, $vr3, 14 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 6 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14 -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr3, $a1, 7 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 6 -; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr4, $a1, 7 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 0 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 2 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 4 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 6 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 3 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 8 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 4 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 10 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 5 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 12 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 6 -; CHECK-NEXT: vpickve2gr.b $a1, $vr2, 14 -; CHECK-NEXT: ext.w.b $a1, $a1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 -; CHECK-NEXT: xvpermi.q $xr1, $xr3, 2 -; CHECK-NEXT: xvpermi.q $xr4, $xr0, 2 -; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr4 +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -127,59 +27,7 @@ define void @vmulwev_w_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14 -; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 6 -; CHECK-NEXT: vpickve2gr.h $a2, $vr2, 4 -; CHECK-NEXT: vpickve2gr.h $a3, $vr2, 2 -; CHECK-NEXT: vpickve2gr.h $a4, $vr2, 0 -; CHECK-NEXT: vpickve2gr.h $a5, $vr0, 6 -; CHECK-NEXT: vpickve2gr.h $a6, $vr0, 4 -; CHECK-NEXT: vpickve2gr.h $a7, $vr0, 2 -; CHECK-NEXT: vpickve2gr.h $t0, $vr0, 0 -; CHECK-NEXT: xvpermi.d $xr0, $xr1, 14 -; CHECK-NEXT: vpickve2gr.h $t1, $vr0, 6 -; CHECK-NEXT: vpickve2gr.h $t2, $vr0, 4 -; CHECK-NEXT: vpickve2gr.h $t3, $vr0, 2 -; CHECK-NEXT: vpickve2gr.h $t4, $vr0, 0 -; CHECK-NEXT: vpickve2gr.h $t5, $vr1, 6 -; CHECK-NEXT: vpickve2gr.h $t6, $vr1, 4 -; CHECK-NEXT: vpickve2gr.h $t7, $vr1, 2 -; CHECK-NEXT: vpickve2gr.h $t8, $vr1, 0 -; CHECK-NEXT: ext.w.h $t0, $t0 -; CHECK-NEXT: vinsgr2vr.w $vr0, $t0, 0 -; CHECK-NEXT: ext.w.h $a7, $a7 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a7, 1 -; CHECK-NEXT: ext.w.h $a6, $a6 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a6, 2 -; CHECK-NEXT: ext.w.h $a5, $a5 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a5, 3 -; CHECK-NEXT: ext.w.h $a4, $a4 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a4, 0 -; CHECK-NEXT: ext.w.h $a3, $a3 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a3, 1 -; CHECK-NEXT: ext.w.h $a2, $a2 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a2, 2 -; CHECK-NEXT: ext.w.h $a1, $a1 -; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3 -; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 -... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/161368 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
