https://github.com/alexey-bataev updated
https://github.com/llvm/llvm-project/pull/77529
>From 7440ee8ba235fd871af0999f66d5d6130456400b Mon Sep 17 00:00:00 2001
From: Alexey Bataev
Date: Tue, 9 Jan 2024 21:43:31 +
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp| 476 ++
.../AArch64/extractelements-to-shuffle.ll | 16 +-
.../AArch64/reorder-fmuladd-crash.ll | 7 +-
.../SLPVectorizer/AArch64/tsc-s116.ll | 22 +-
.../Transforms/SLPVectorizer/X86/pr35497.ll | 16 +-
.../SLPVectorizer/X86/reduction-transpose.ll | 16 +-
.../X86/reorder-clustered-node.ll | 11 +-
.../X86/reorder-reused-masked-gather.ll | 7 +-
.../SLPVectorizer/X86/reorder-vf-to-resize.ll | 2 +-
.../X86/scatter-vectorize-reorder.ll | 17 +-
.../X86/shrink_after_reorder2.ll | 11 +-
11 files changed, 445 insertions(+), 156 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8e22b54f002d1cb..4765cef290b9df9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -858,7 +858,7 @@ static void addMask(SmallVectorImpl ,
ArrayRef SubMask,
/// values 3 and 7 respectively:
/// before: 6 9 5 4 9 2 1 0
/// after: 6 3 5 4 7 2 1 0
-static void fixupOrderingIndices(SmallVectorImpl ) {
+static void fixupOrderingIndices(MutableArrayRef Order) {
const unsigned Sz = Order.size();
SmallBitVector UnusedIndices(Sz, /*t=*/true);
SmallBitVector MaskedIndices(Sz);
@@ -2418,7 +2418,8 @@ class BoUpSLP {
std::optional
isGatherShuffledSingleRegisterEntry(
const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask,
- SmallVectorImpl , unsigned Part);
+ SmallVectorImpl , unsigned Part,
+ bool ForOrder);
/// Checks if the gathered \p VL can be represented as multi-register
/// shuffle(s) of previous tree entries.
@@ -2432,7 +2433,7 @@ class BoUpSLP {
isGatherShuffledEntry(
const TreeEntry *TE, ArrayRef VL, SmallVectorImpl ,
SmallVectorImpl> ,
- unsigned NumParts);
+ unsigned NumParts, bool ForOrder = false);
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
@@ -3756,65 +3757,169 @@ static void reorderOrder(SmallVectorImpl
, ArrayRef Mask) {
std::optional
BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry ) {
assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
- unsigned NumScalars = TE.Scalars.size();
+ // Try to find subvector extract/insert patterns and reorder only such
+ // patterns.
+ SmallVector GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
+ Type *ScalarTy = GatheredScalars.front()->getType();
+ int NumScalars = GatheredScalars.size();
+ if (!isValidElementType(ScalarTy))
+return std::nullopt;
+ auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars);
+ int NumParts = TTI->getNumberOfParts(VecTy);
+ if (NumParts == 0 || NumParts >= NumScalars)
+NumParts = 1;
+ SmallVector ExtractMask;
+ SmallVector Mask;
+ SmallVector> Entries;
+ SmallVector> ExtractShuffles
=
+ tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
+ SmallVector> GatherShuffles =
+ isGatherShuffledEntry(, GatheredScalars, Mask, Entries, NumParts,
+/*ForOrder=*/true);
+ // No shuffled operands - ignore.
+ if (GatherShuffles.empty() && ExtractShuffles.empty())
+return std::nullopt;
OrdersType CurrentOrder(NumScalars, NumScalars);
- SmallVector Positions;
- SmallBitVector UsedPositions(NumScalars);
- const TreeEntry *STE = nullptr;
- // Try to find all gathered scalars that are gets vectorized in other
- // vectorize node. Here we can have only one single tree vector node to
- // correctly identify order of the gathered scalars.
- for (unsigned I = 0; I < NumScalars; ++I) {
-Value *V = TE.Scalars[I];
-if (!isa(V))
- continue;
-if (const auto *LocalSTE = getTreeEntry(V)) {
- if (!STE)
-STE = LocalSTE;
- else if (STE != LocalSTE)
-// Take the order only from the single vector node.
-return std::nullopt;
- unsigned Lane =
- std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
- if (Lane >= NumScalars)
-return std::nullopt;
- if (CurrentOrder[Lane] != NumScalars) {
-if (Lane != I)
+ if (GatherShuffles.size() == 1 &&
+ *GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
+ Entries.front().front()->isSame(TE.Scalars)) {
+// Exclude nodes for strided geps from analysis, better to reorder them.
+if