================ @@ -1832,6 +1832,48 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); } +/// Lower VECTOR_SHUFFLE into XVPERM (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + // LoongArch LASX only have XVPERM_W. + if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32)) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfSize = NumElts / 2; + bool FrontLo = true, FrontHi = true; + bool BackLo = true, BackHi = true; + + auto inRange = [](int val, int low, int high) { + return (val == -1) || (val >= low && val < high); + }; + + for (unsigned i = 0; i < HalfSize; ++i) { + int Fronti = Mask[i]; + int Backi = Mask[i + HalfSize]; + + FrontLo &= inRange(Fronti, 0, HalfSize); + FrontHi &= inRange(Fronti, HalfSize, NumElts); + BackLo &= inRange(Backi, 0, HalfSize); + BackHi &= inRange(Backi, HalfSize, NumElts); + } + + // If both the lower and upper 128-bit parts access only one half of the + // vector (either lower or upper), avoid using xvperm.w. The latency of + // xvperm.w(3) is higher than using xvshuf(1) and xvori(1). ---------------- zhaoqi5 wrote:
OK. Thanks for your review. https://github.com/llvm/llvm-project/pull/151634 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits