[clang-tools-extra] [llvm] [clang] [SLP]Improve findReusedOrderedScalars and graph rotation. (PR #77529)

2024-01-30 Thread Simon Pilgrim via cfe-commits


@@ -2418,7 +2418,8 @@ class BoUpSLP {
   std::optional
   isGatherShuffledSingleRegisterEntry(
   const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask,
-  SmallVectorImpl , unsigned Part);
+  SmallVectorImpl , unsigned Part,
+  bool ForOrder);

RKSimon wrote:

Add ForOrder to the doxygen description

https://github.com/llvm/llvm-project/pull/77529
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang-tools-extra] [llvm] [clang] [SLP]Improve findReusedOrderedScalars and graph rotation. (PR #77529)

2024-01-29 Thread Alexey Bataev via cfe-commits

https://github.com/alexey-bataev updated 
https://github.com/llvm/llvm-project/pull/77529

>From 7440ee8ba235fd871af0999f66d5d6130456400b Mon Sep 17 00:00:00 2001
From: Alexey Bataev 
Date: Tue, 9 Jan 2024 21:43:31 +
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5
---
 .../Transforms/Vectorize/SLPVectorizer.cpp| 476 ++
 .../AArch64/extractelements-to-shuffle.ll |  16 +-
 .../AArch64/reorder-fmuladd-crash.ll  |   7 +-
 .../SLPVectorizer/AArch64/tsc-s116.ll |  22 +-
 .../Transforms/SLPVectorizer/X86/pr35497.ll   |  16 +-
 .../SLPVectorizer/X86/reduction-transpose.ll  |  16 +-
 .../X86/reorder-clustered-node.ll |  11 +-
 .../X86/reorder-reused-masked-gather.ll   |   7 +-
 .../SLPVectorizer/X86/reorder-vf-to-resize.ll |   2 +-
 .../X86/scatter-vectorize-reorder.ll  |  17 +-
 .../X86/shrink_after_reorder2.ll  |  11 +-
 11 files changed, 445 insertions(+), 156 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8e22b54f002d1cb..4765cef290b9df9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -858,7 +858,7 @@ static void addMask(SmallVectorImpl , 
ArrayRef SubMask,
 /// values 3 and 7 respectively:
 /// before:  6 9 5 4 9 2 1 0
 /// after:   6 3 5 4 7 2 1 0
-static void fixupOrderingIndices(SmallVectorImpl ) {
+static void fixupOrderingIndices(MutableArrayRef Order) {
   const unsigned Sz = Order.size();
   SmallBitVector UnusedIndices(Sz, /*t=*/true);
   SmallBitVector MaskedIndices(Sz);
@@ -2418,7 +2418,8 @@ class BoUpSLP {
   std::optional
   isGatherShuffledSingleRegisterEntry(
   const TreeEntry *TE, ArrayRef VL, MutableArrayRef Mask,
-  SmallVectorImpl , unsigned Part);
+  SmallVectorImpl , unsigned Part,
+  bool ForOrder);
 
   /// Checks if the gathered \p VL can be represented as multi-register
   /// shuffle(s) of previous tree entries.
@@ -2432,7 +2433,7 @@ class BoUpSLP {
   isGatherShuffledEntry(
   const TreeEntry *TE, ArrayRef VL, SmallVectorImpl ,
   SmallVectorImpl> ,
-  unsigned NumParts);
+  unsigned NumParts, bool ForOrder = false);
 
   /// \returns the scalarization cost for this list of values. Assuming that
   /// this subtree gets vectorized, we may need to extract the values from the
@@ -3756,65 +3757,169 @@ static void reorderOrder(SmallVectorImpl 
, ArrayRef Mask) {
 std::optional
 BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry ) {
   assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
-  unsigned NumScalars = TE.Scalars.size();
+  // Try to find subvector extract/insert patterns and reorder only such
+  // patterns.
+  SmallVector GatheredScalars(TE.Scalars.begin(), TE.Scalars.end());
+  Type *ScalarTy = GatheredScalars.front()->getType();
+  int NumScalars = GatheredScalars.size();
+  if (!isValidElementType(ScalarTy))
+return std::nullopt;
+  auto *VecTy = FixedVectorType::get(ScalarTy, NumScalars);
+  int NumParts = TTI->getNumberOfParts(VecTy);
+  if (NumParts == 0 || NumParts >= NumScalars)
+NumParts = 1;
+  SmallVector ExtractMask;
+  SmallVector Mask;
+  SmallVector> Entries;
+  SmallVector> ExtractShuffles 
=
+  tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
+  SmallVector> GatherShuffles =
+  isGatherShuffledEntry(, GatheredScalars, Mask, Entries, NumParts,
+/*ForOrder=*/true);
+  // No shuffled operands - ignore.
+  if (GatherShuffles.empty() && ExtractShuffles.empty())
+return std::nullopt;
   OrdersType CurrentOrder(NumScalars, NumScalars);
-  SmallVector Positions;
-  SmallBitVector UsedPositions(NumScalars);
-  const TreeEntry *STE = nullptr;
-  // Try to find all gathered scalars that are gets vectorized in other
-  // vectorize node. Here we can have only one single tree vector node to
-  // correctly identify order of the gathered scalars.
-  for (unsigned I = 0; I < NumScalars; ++I) {
-Value *V = TE.Scalars[I];
-if (!isa(V))
-  continue;
-if (const auto *LocalSTE = getTreeEntry(V)) {
-  if (!STE)
-STE = LocalSTE;
-  else if (STE != LocalSTE)
-// Take the order only from the single vector node.
-return std::nullopt;
-  unsigned Lane =
-  std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
-  if (Lane >= NumScalars)
-return std::nullopt;
-  if (CurrentOrder[Lane] != NumScalars) {
-if (Lane != I)
+  if (GatherShuffles.size() == 1 &&
+  *GatherShuffles.front() == TTI::SK_PermuteSingleSrc &&
+  Entries.front().front()->isSame(TE.Scalars)) {
+// Exclude nodes for strided geps from analysis, better to reorder them.
+if