================ @@ -3779,65 +3780,169 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask, std::optional<BoUpSLP::OrdersType> BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only."); - unsigned NumScalars = TE.Scalars.size(); + // Try to find subvector extract/insert patterns and reorder only such + // patterns. + SmallVector<Value *> GatheredScalars(TE.Scalars.begin(), TE.Scalars.end()); + Type *ScalarTy = GatheredScalars.front()->getType(); + int NumScalars = GatheredScalars.size(); + if (!isValidElementType(ScalarTy)) + return std::nullopt; + auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars); + int NumParts = TTI->getNumberOfParts(VecTy); + if (NumParts == 0 || NumParts >= NumScalars) + NumParts = 1; + SmallVector<int> ExtractMask; + SmallVector<int> Mask; + SmallVector<SmallVector<const TreeEntry *>> Entries; + SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> ExtractShuffles = + tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts); + SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles = + isGatherShuffledEntry(&TE, GatheredScalars, Mask, Entries, NumParts, + /*ForOrder=*/true); + // No shuffled operands - ignore. + if (GatherShuffles.empty() && ExtractShuffles.empty()) + return std::nullopt; OrdersType CurrentOrder(NumScalars, NumScalars); - SmallVector<int> Positions; - SmallBitVector UsedPositions(NumScalars); - const TreeEntry *STE = nullptr; - // Try to find all gathered scalars that are gets vectorized in other - // vectorize node. Here we can have only one single tree vector node to - // correctly identify order of the gathered scalars. - for (unsigned I = 0; I < NumScalars; ++I) { - Value *V = TE.Scalars[I]; - if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V)) - continue; - if (const auto *LocalSTE = getTreeEntry(V)) { - if (!STE) - STE = LocalSTE; - else if (STE != LocalSTE) - // Take the order only from the single vector node. - return std::nullopt; - unsigned Lane = - std::distance(STE->Scalars.begin(), find(STE->Scalars, V)); - if (Lane >= NumScalars) - return std::nullopt; - if (CurrentOrder[Lane] != NumScalars) { - if (Lane != I) + if (GatherShuffles.size() == 1 && + *GatherShuffles.front() == TTI::SK_PermuteSingleSrc && + Entries.front().front()->isSame(TE.Scalars)) { + // Exclude nodes for strided geps from analysis, better to reorder them. + if (!TE.UserTreeIndices.empty() && + TE.UserTreeIndices.front().UserTE->State == + TreeEntry::PossibleStridedVectorize && + Entries.front().front()->State == TreeEntry::NeedToGather) + return std::nullopt; + // Perfect match in the graph, will reuse the previously vectorized + // node. Cost is 0. + std::iota(CurrentOrder.begin(), CurrentOrder.end(), 0); + return CurrentOrder; + } + auto IsBroadcastMask = [](ArrayRef<int> Mask) { + int SingleElt = PoisonMaskElem; + return all_of(Mask, [&](int I) { + if (SingleElt == PoisonMaskElem && I != PoisonMaskElem) + SingleElt = I; + return I == PoisonMaskElem || I == SingleElt; + }); + }; + // Exclusive broadcast mask - ignore. + if ((ExtractShuffles.empty() && IsBroadcastMask(Mask) && + (Entries.size() != 1 || + Entries.front().front()->ReorderIndices.empty())) || + (GatherShuffles.empty() && IsBroadcastMask(ExtractMask))) + return std::nullopt; + SmallBitVector ShuffledSubMasks(NumParts); + auto TransformMaskToOrder = [&](MutableArrayRef<unsigned> CurrentOrder, + ArrayRef<int> Mask, int PartSz, int NumParts, + function_ref<unsigned(unsigned)> GetVF) { + for (int I : seq<int>(0, NumParts)) { ---------------- RKSimon wrote:
why use seq instead of just a basic for loop? https://github.com/llvm/llvm-project/pull/77529 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits