https://gcc.gnu.org/g:32cf28ccc9e77ce0e21db38fa1bdfe1b71bbd031

commit r15-5069-g32cf28ccc9e77ce0e21db38fa1bdfe1b71bbd031
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Nov 8 13:25:13 2024 +0100

    Do not cost the permute node that are part of SLP load-lanes
    
    There are some SVE testsuite fails when forcing SLP because costing
    prevents VLA vectors from being used as we add permute cost for
    the VEC_PERM nodes that are part of a SLP load-lanes node.  The
    permutes only exist for representational reasons and pessimize SLP
    vs non-SLP so the following makes sure to cost them as zero.
    
            * tree-vect-slp.cc (vectorizable_slp_permutation_1): Return
            zero for the permute nodes part of load-lanes.

Diff:
---
 gcc/tree-vect-slp.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 97c362d24f8d..ffe9e718575b 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -10568,13 +10568,15 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
   /* Load-lanes permute.  This permute only acts as a forwarder to
      select the correct vector def of the load-lanes load which
      has the permuted vectors in its vector defs like
-     { v0, w0, r0, v1, w1, r1 ... } for a ld3.  */
+     { v0, w0, r0, v1, w1, r1 ... } for a ld3.  All costs are
+     accounted for in the costing for the actual load so we
+     return zero here.  */
   if (node->ldst_lanes)
     {
       gcc_assert (children.length () == 1);
       if (!gsi)
        /* This is a trivial op always supported.  */
-       return 1;
+       return 0;
       slp_tree child = children[0];
       unsigned vec_idx = (SLP_TREE_LANE_PERMUTATION (node)[0].second
                          / SLP_TREE_LANES (node));
@@ -10584,7 +10586,7 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
          tree def = SLP_TREE_VEC_DEFS (child)[i * vec_num  + vec_idx];
          node->push_vec_def (def);
        }
-      return 1;
+      return 0;
     }
 
   /* Set REPEATING_P to true if the permutations are cylical wrt UNPACK_FACTOR

Reply via email to