https://gcc.gnu.org/g:32cf28ccc9e77ce0e21db38fa1bdfe1b71bbd031
commit r15-5069-g32cf28ccc9e77ce0e21db38fa1bdfe1b71bbd031 Author: Richard Biener <rguent...@suse.de> Date: Fri Nov 8 13:25:13 2024 +0100 Do not cost the permute node that are part of SLP load-lanes There are some SVE testsuite fails when forcing SLP because costing prevents VLA vectors from being used as we add permute cost for the VEC_PERM nodes that are part of a SLP load-lanes node. The permutes only exist for representational reasons and pessimize SLP vs non-SLP so the following makes sure to cost them as zero. * tree-vect-slp.cc (vectorizable_slp_permutation_1): Return zero for the permute nodes part of load-lanes. Diff: --- gcc/tree-vect-slp.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 97c362d24f8d..ffe9e718575b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -10568,13 +10568,15 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, /* Load-lanes permute. This permute only acts as a forwarder to select the correct vector def of the load-lanes load which has the permuted vectors in its vector defs like - { v0, w0, r0, v1, w1, r1 ... } for a ld3. */ + { v0, w0, r0, v1, w1, r1 ... } for a ld3. All costs are + accounted for in the costing for the actual load so we + return zero here. */ if (node->ldst_lanes) { gcc_assert (children.length () == 1); if (!gsi) /* This is a trivial op always supported. */ - return 1; + return 0; slp_tree child = children[0]; unsigned vec_idx = (SLP_TREE_LANE_PERMUTATION (node)[0].second / SLP_TREE_LANES (node)); @@ -10584,7 +10586,7 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, tree def = SLP_TREE_VEC_DEFS (child)[i * vec_num + vec_idx]; node->push_vec_def (def); } - return 1; + return 0; } /* Set REPEATING_P to true if the permutations are cylical wrt UNPACK_FACTOR