On Mon, 1 Dec 2025 at 18:17, Richard Biener <[email protected]> wrote:
>
>
>
> > Am 01.12.2025 um 17:21 schrieb Christophe Lyon <[email protected]>:
> >
> > On Mon, 13 Oct 2025 at 14:09, Richard Biener <[email protected]> wrote:
> >>
> >> The following moves us (almost) away from REDUC_GROUP_* to recognize
> >> reduction chaings towards making this a SLP discovery artifact.
> >> Reduction chains are now explicitly marked in the reduction info
> >> and discovery is done during SLP discovery rather than during
> >> analysis of scalar cycles. This gets rid of interactions with
> >> patterns and it also allows to transparently fall back to non-chained
> >> reductions even when there is a conversion involved. This also
> >> spurred some major TLC in vectorizable_reduction.
> >>
> >> What's still missing is to get rid of the last REDUC_GROUP_FIRST_ELEMENT
> >> usage in SLP discovery - by not claiming we can handle the reduction
> >> chain itself there. I'm leaving this for a followup (this was big
> >> enough).
> >>
> >> At least on x86-64 I now see XPASSes for gcc.dg/vect/vect-reduc-dot-s8b.c
> >> and gcc.dg/vect/vect-reduc-pattern-2c.c. I have not done careful
> >> analysis yet, will wait for the CI with that.
> >
> > Linaro CI detected them on arm and aarch64 too, do you want me to file
> > a PR to keep track of that?
>
> Please.
>
Just filed PR 122961
Thanks,
Christophe
> Richard
>
> >
> > Thanks,
> >
> > Christophe
> >
> >>
> >> Bootstrap and regtest running on x86_64-unknown-linux-gnu, comments
> >> welcome.
> >>
> >> Thanks,
> >> Richard.
> >>
> >> * tree-vectorizer.h (vect_reduc_info_s::is_reduc_chain): New.
> >> (_loop_vec_info::reduction_chains): Remove.
> >> (LOOP_VINFO_REDUCTION_CHAINS): Likewise.
> >> * tree-vect-patterns.cc (vect_reassociating_reduction_p):
> >> Do not special-case reduction group stmts.
> >> * tree-vect-loop.cc (vect_is_simple_reduction): Remove
> >> reduction chain handling.
> >> (vect_analyze_scalar_cycles_1): Remove slp parameter and adjust.
> >> (vect_analyze_scalar_cycles): Likewise.
> >> (vect_fixup_reduc_chain): Remove.
> >> (vect_fixup_scalar_cycles_with_patterns): Likewise.
> >> (vect_analyze_loop_2): Adjust.
> >> (vect_create_epilog_for_reduction): Check the reduction info
> >> for whether this is a reduction chain.
> >> (vect_transform_cycle_phi): Likewise.
> >> (vectorizable_reduction): Likewise. Simplify code for all-SLP.
> >> * tree-vect-slp.cc (vect_analyze_slp_reduc_chain): Simplify.
> >> (vect_analyze_slp_reduction): New function, perform reduction
> >> chain discovery here.
> >> (vect_analyze_slp): Remove reduction chain handling.
> >> Use vect_analyze_slp_reduction for possible reduction chain
> >> processing.
> >>
> >> * gcc.dg/vect/pr120687-1.c: Adjust.
> >> * gcc.dg/vect/pr120687-2.c: Likewise.
> >> * gcc.dg/vect/pr120687-3.c: Likewise.
> >> ---
> >> gcc/testsuite/gcc.dg/vect/pr120687-1.c | 2 +-
> >> gcc/testsuite/gcc.dg/vect/pr120687-2.c | 2 +-
> >> gcc/testsuite/gcc.dg/vect/pr120687-3.c | 2 +-
> >> gcc/tree-vect-loop.cc | 245 +++-----------
> >> gcc/tree-vect-patterns.cc | 12 +-
> >> gcc/tree-vect-slp.cc | 432 +++++++++++++------------
> >> gcc/tree-vectorizer.h | 8 +-
> >> 7 files changed, 283 insertions(+), 420 deletions(-)
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-1.c
> >> b/gcc/testsuite/gcc.dg/vect/pr120687-1.c
> >> index ce9cf6301ce..ac684c0e826 100644
> >> --- a/gcc/testsuite/gcc.dg/vect/pr120687-1.c
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr120687-1.c
> >> @@ -11,6 +11,6 @@ frd (unsigned *p, unsigned *lastone)
> >> return sum;
> >> }
> >>
> >> -/* { dg-final { scan-tree-dump "reduction: detected reduction chain"
> >> "vect" } } */
> >> +/* { dg-final { scan-tree-dump "Starting SLP discovery of reduction
> >> chain" "vect" } } */
> >> /* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain
> >> failed" "vect" } } */
> >> /* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-2.c
> >> b/gcc/testsuite/gcc.dg/vect/pr120687-2.c
> >> index dfc6dc726e9..25f03555ba1 100644
> >> --- a/gcc/testsuite/gcc.dg/vect/pr120687-2.c
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr120687-2.c
> >> @@ -12,6 +12,6 @@ frd (float *p, float *lastone)
> >> return sum;
> >> }
> >>
> >> -/* { dg-final { scan-tree-dump "reduction: detected reduction chain"
> >> "vect" } } */
> >> +/* { dg-final { scan-tree-dump "Starting SLP discovery of reduction
> >> chain" "vect" } } */
> >> /* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain
> >> failed" "vect" } } */
> >> /* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-3.c
> >> b/gcc/testsuite/gcc.dg/vect/pr120687-3.c
> >> index f20a66a6223..31a6c9419ec 100644
> >> --- a/gcc/testsuite/gcc.dg/vect/pr120687-3.c
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr120687-3.c
> >> @@ -11,6 +11,6 @@ frd (float *p, float *lastone)
> >> return sum;
> >> }
> >>
> >> -/* { dg-final { scan-tree-dump "reduction: detected reduction chain"
> >> "vect" } } */
> >> +/* { dg-final { scan-tree-dump "Starting SLP discovery of reduction
> >> chain" "vect" } } */
> >> /* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain
> >> failed" "vect" } } */
> >> /* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
> >> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> >> index 003dc734c01..ac30352b630 100644
> >> --- a/gcc/tree-vect-loop.cc
> >> +++ b/gcc/tree-vect-loop.cc
> >> @@ -161,7 +161,7 @@ along with GCC; see the file COPYING3. If not see
> >> static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int
> >> *,
> >> unsigned *);
> >> static stmt_vec_info vect_is_simple_reduction (loop_vec_info,
> >> stmt_vec_info,
> >> - gphi **, bool *, bool);
> >> + gphi **);
> >>
> >>
> >> /* Function vect_is_simple_iv_evolution.
> >> @@ -341,8 +341,7 @@ vect_phi_first_order_recurrence_p (loop_vec_info
> >> loop_vinfo, class loop *loop,
> >> slp analyses or not. */
> >>
> >> static void
> >> -vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
> >> - bool slp)
> >> +vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
> >> {
> >> basic_block bb = loop->header;
> >> auto_vec<stmt_vec_info, 64> worklist;
> >> @@ -425,19 +424,15 @@ vect_analyze_scalar_cycles_1 (loop_vec_info
> >> loop_vinfo, class loop *loop,
> >> && STMT_VINFO_DEF_TYPE (stmt_vinfo) ==
> >> vect_unknown_def_type);
> >>
> >> gphi *double_reduc;
> >> - bool reduc_chain;
> >> stmt_vec_info reduc_stmt_info
> >> - = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
> >> - &reduc_chain, slp);
> >> + = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
> >> if (reduc_stmt_info && double_reduc)
> >> {
> >> - bool inner_chain;
> >> stmt_vec_info inner_phi_info
> >> = loop_vinfo->lookup_stmt (double_reduc);
> >> /* ??? Pass down flag we're the inner loop of a double reduc. */
> >> stmt_vec_info inner_reduc_info
> >> - = vect_is_simple_reduction (loop_vinfo, inner_phi_info,
> >> - NULL, &inner_chain, slp);
> >> + = vect_is_simple_reduction (loop_vinfo, inner_phi_info, NULL);
> >> if (inner_reduc_info)
> >> {
> >> STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
> >> @@ -478,12 +473,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info
> >> loop_vinfo, class loop *loop,
> >>
> >> STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
> >> STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def;
> >> - /* Store the reduction cycles for possible vectorization in
> >> - loop-aware SLP if it was not detected as reduction
> >> - chain. */
> >> - if (! reduc_chain)
> >> - LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
> >> - (reduc_stmt_info);
> >> + LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
> >> (reduc_stmt_info);
> >> }
> >> }
> >> else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
> >> @@ -518,11 +508,11 @@ vect_analyze_scalar_cycles_1 (loop_vec_info
> >> loop_vinfo, class loop *loop,
> >> a[i] = i; */
> >>
> >> static void
> >> -vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
> >> +vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
> >> {
> >> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> >>
> >> - vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
> >> + vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
> >>
> >> /* When vectorizing an outer-loop, the inner-loop is executed
> >> sequentially.
> >> Reductions in such inner-loop therefore have different properties than
> >> @@ -534,87 +524,7 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo,
> >> bool slp)
> >> current checks are too strict. */
> >>
> >> if (loop->inner)
> >> - vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp);
> >> -}
> >> -
> >> -/* Transfer group and reduction information from STMT_INFO to its
> >> - pattern stmt. */
> >> -
> >> -static void
> >> -vect_fixup_reduc_chain (stmt_vec_info stmt_info)
> >> -{
> >> - stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info);
> >> - stmt_vec_info stmtp;
> >> - gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)
> >> - && REDUC_GROUP_FIRST_ELEMENT (stmt_info));
> >> - REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info);
> >> - do
> >> - {
> >> - stmtp = STMT_VINFO_RELATED_STMT (stmt_info);
> >> - gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)
> >> - == STMT_VINFO_DEF_TYPE (stmt_info));
> >> - REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp;
> >> - stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info);
> >> - if (stmt_info)
> >> - REDUC_GROUP_NEXT_ELEMENT (stmtp)
> >> - = STMT_VINFO_RELATED_STMT (stmt_info);
> >> - }
> >> - while (stmt_info);
> >> -}
> >> -
> >> -/* Fixup scalar cycles that now have their stmts detected as patterns. */
> >> -
> >> -static void
> >> -vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
> >> -{
> >> - stmt_vec_info first;
> >> - unsigned i;
> >> -
> >> - FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
> >> - {
> >> - stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
> >> - while (next)
> >> - {
> >> - if ((STMT_VINFO_IN_PATTERN_P (next)
> >> - != STMT_VINFO_IN_PATTERN_P (first))
> >> - || STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) ==
> >> -1)
> >> - break;
> >> - next = REDUC_GROUP_NEXT_ELEMENT (next);
> >> - }
> >> - /* If all reduction chain members are well-formed patterns adjust
> >> - the group to group the pattern stmts instead. */
> >> - if (! next
> >> - && STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -1)
> >> - {
> >> - if (STMT_VINFO_IN_PATTERN_P (first))
> >> - {
> >> - vect_fixup_reduc_chain (first);
> >> - LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
> >> - = STMT_VINFO_RELATED_STMT (first);
> >> - }
> >> - }
> >> - /* If not all stmt in the chain are patterns or if we failed
> >> - to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
> >> - it as regular reduction instead. */
> >> - else
> >> - {
> >> - stmt_vec_info vinfo = first;
> >> - stmt_vec_info last = NULL;
> >> - while (vinfo)
> >> - {
> >> - next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
> >> - REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
> >> - REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
> >> - last = vinfo;
> >> - vinfo = next;
> >> - }
> >> - STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
> >> - = vect_internal_def;
> >> - loop_vinfo->reductions.safe_push (vect_stmt_to_vectorize (last));
> >> - LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (i);
> >> - --i;
> >> - }
> >> - }
> >> + vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
> >> }
> >>
> >> /* Function vect_get_loop_niters.
> >> @@ -2267,12 +2177,10 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo,
> >> bool &fatal,
> >>
> >> /* Classify all cross-iteration scalar data-flow cycles.
> >> Cross-iteration cycles caused by virtual phis are analyzed
> >> separately. */
> >> - vect_analyze_scalar_cycles (loop_vinfo, !force_single_lane);
> >> + vect_analyze_scalar_cycles (loop_vinfo);
> >>
> >> vect_pattern_recog (loop_vinfo);
> >>
> >> - vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
> >> -
> >> /* Analyze the access patterns of the data-refs in the loop (consecutive,
> >> complex, etc.). FORNOW: Only handle consecutive access pattern. */
> >>
> >> @@ -2681,10 +2589,6 @@ again:
> >> if (applying_suggested_uf)
> >> return ok;
> >>
> >> - /* If there are reduction chains re-trying will fail anyway. */
> >> - if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
> >> - return ok;
> >> -
> >> /* Likewise if the grouped loads or stores in the SLP cannot be handled
> >> via interleaving or lane instructions. */
> >> slp_instance instance;
> >> @@ -3762,7 +3666,7 @@ check_reduction_path (dump_user_location_t loc,
> >> loop_p loop, gphi *phi,
> >>
> >> static stmt_vec_info
> >> vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
> >> - gphi **double_reduc, bool *reduc_chain_p, bool
> >> slp)
> >> + gphi **double_reduc)
> >> {
> >> gphi *phi = as_a <gphi *> (phi_info->stmt);
> >> gimple *phi_use_stmt = NULL;
> >> @@ -3774,7 +3678,6 @@ vect_is_simple_reduction (loop_vec_info loop_info,
> >> stmt_vec_info phi_info,
> >> bool inner_loop_of_double_reduc = double_reduc == NULL;
> >> if (double_reduc)
> >> *double_reduc = NULL;
> >> - *reduc_chain_p = false;
> >> STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
> >>
> >> tree phi_name = PHI_RESULT (phi);
> >> @@ -3924,12 +3827,8 @@ vect_is_simple_reduction (loop_vec_info loop_info,
> >> stmt_vec_info phi_info,
> >> if (code == COND_EXPR && !nested_in_vect_loop)
> >> STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
> >>
> >> - /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
> >> - reduction chain for which the additional restriction is that
> >> - all operations in the chain are the same. */
> >> - auto_vec<stmt_vec_info, 8> reduc_chain;
> >> + /* Fill in STMT_VINFO_REDUC_IDX. */
> >> unsigned i;
> >> - bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
> >> for (i = path.length () - 1; i >= 1; --i)
> >> {
> >> gimple *stmt = USE_STMT (path[i].second);
> >> @@ -3946,39 +3845,8 @@ vect_is_simple_reduction (loop_vec_info loop_info,
> >> stmt_vec_info phi_info,
> >> STMT_VINFO_REDUC_IDX (stmt_info)
> >> = path[i].second->use - gimple_call_arg_ptr (call, 0);
> >> }
> >> - bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)
> >> - && (i == 1 || i == path.length () -
> >> 1));
> >> - if ((op.code != code && !leading_conversion)
> >> - /* We can only handle the final value in epilogue
> >> - generation for reduction chains. */
> >> - || (i != 1 && !has_single_use (gimple_get_lhs (stmt))))
> >> - is_slp_reduc = false;
> >> - /* For reduction chains we support a trailing/leading
> >> - conversions. We do not store those in the actual chain. */
> >> - if (leading_conversion)
> >> - continue;
> >> - reduc_chain.safe_push (stmt_info);
> >> }
> >> - if (slp && is_slp_reduc && reduc_chain.length () > 1)
> >> - {
> >> - for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
> >> - {
> >> - REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
> >> - REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
> >> - }
> >> - REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
> >> - REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
> >> -
> >> - /* Save the chain for further analysis in SLP detection. */
> >> - LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push
> >> (reduc_chain[0]);
> >> - REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
> >> -
> >> - *reduc_chain_p = true;
> >> - if (dump_enabled_p ())
> >> - dump_printf_loc (MSG_NOTE, vect_location,
> >> - "reduction: detected reduction chain\n");
> >> - }
> >> - else if (dump_enabled_p ())
> >> + if (dump_enabled_p ())
> >> dump_printf_loc (MSG_NOTE, vect_location,
> >> "reduction: detected reduction\n");
> >>
> >> @@ -5411,8 +5279,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> >> loop_vinfo,
> >> # b1 = phi <b2, b0>
> >> a2 = operation (a1)
> >> b2 = operation (b1) */
> >> - const bool slp_reduc
> >> - = SLP_INSTANCE_KIND (slp_node_instance) != slp_inst_kind_reduc_chain;
> >> + const bool slp_reduc = !reduc_info->is_reduc_chain;
> >> tree induction_index = NULL_TREE;
> >>
> >> unsigned int group_size = SLP_TREE_LANES (slp_node);
> >> @@ -6962,8 +6829,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >> bool single_defuse_cycle = false;
> >> tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
> >> tree cond_reduc_val = NULL_TREE;
> >> - const bool reduc_chain
> >> - = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
> >>
> >> /* Make sure it was already recognized as a reduction computation. */
> >> if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
> >> @@ -7025,6 +6890,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >> double_reduc = true;
> >> }
> >>
> >> + const bool reduc_chain = reduc_info->is_reduc_chain;
> >> slp_node_instance->reduc_phis = slp_node;
> >> /* ??? We're leaving slp_node to point to the PHIs, we only
> >> need it to get at the number of vector stmts which wasn't
> >> @@ -7036,33 +6902,28 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >>
> >> /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> >> and compute the reduction chain length. Discover the real
> >> - reduction operation stmt on the way (stmt_info and
> >> slp_for_stmt_info). */
> >> - tree reduc_def
> >> - = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_latch_edge (loop));
> >> + reduction operation stmt on the way (slp_for_stmt_info). */
> >> unsigned reduc_chain_length = 0;
> >> - bool only_slp_reduc_chain = true;
> >> stmt_info = NULL;
> >> slp_tree slp_for_stmt_info = NULL;
> >> slp_tree vdef_slp = slp_node_instance->root;
> >> - /* For double-reductions we start SLP analysis at the inner loop LC PHI
> >> - which is the def of the outer loop live stmt. */
> >> - if (double_reduc)
> >> - vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[0];
> >> - while (reduc_def != PHI_RESULT (reduc_def_phi))
> >> + while (vdef_slp != slp_node)
> >> {
> >> - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> >> - stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> >> - int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
> >> - if (STMT_VINFO_REDUC_IDX (vdef) == -1
> >> - || SLP_TREE_REDUC_IDX (vdef_slp) == -1)
> >> + int reduc_idx = SLP_TREE_REDUC_IDX (vdef_slp);
> >> + if (reduc_idx == -1)
> >> {
> >> if (dump_enabled_p ())
> >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >> "reduction chain broken by patterns.\n");
> >> return false;
> >> }
> >> - if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> >> - only_slp_reduc_chain = false;
> >> + stmt_vec_info vdef = SLP_TREE_REPRESENTATIVE (vdef_slp);
> >> + if (is_a <gphi *> (vdef->stmt))
> >> + {
> >> + vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
> >> + /* Do not count PHIs towards the chain length. */
> >> + continue;
> >> + }
> >> gimple_match_op op;
> >> if (!gimple_extract_op (vdef->stmt, &op))
> >> {
> >> @@ -7086,11 +6947,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >> else
> >> {
> >> /* First non-conversion stmt. */
> >> - if (!stmt_info)
> >> - {
> >> - stmt_info = vdef;
> >> - slp_for_stmt_info = vdef_slp;
> >> - }
> >> + if (!slp_for_stmt_info)
> >> + slp_for_stmt_info = vdef_slp;
> >>
> >> if (lane_reducing_op_p (op.code))
> >> {
> >> @@ -7122,29 +6980,15 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >> }
> >> else if (!vectype_in)
> >> vectype_in = SLP_TREE_VECTYPE (slp_node);
> >> - if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> >> - {
> >> - gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp));
> >> - vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
> >> - }
> >> + vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
> >> }
> >> -
> >> - reduc_def = op.ops[reduc_idx];
> >> reduc_chain_length++;
> >> }
> >> + stmt_info = SLP_TREE_REPRESENTATIVE (slp_for_stmt_info);
> >> +
> >> /* PHIs should not participate in patterns. */
> >> gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> >>
> >> - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> >> - element. */
> >> - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> >> - {
> >> - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> >> - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> >> - }
> >> - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> >> - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
> >> -
> >> /* 1. Is vectorizable reduction? */
> >> /* Not supportable if the reduction variable is used in the loop, unless
> >> it's a reduction chain. */
> >> @@ -7459,8 +7303,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >> {
> >> /* When vectorizing a reduction chain w/o SLP the reduction PHI
> >> is not directy used in stmt. */
> >> - if (!only_slp_reduc_chain
> >> - && reduc_chain_length != 1)
> >> + if (reduc_chain_length != 1)
> >> {
> >> if (dump_enabled_p ())
> >> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >> @@ -7795,22 +7638,18 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
> >>
> >> /* All but single defuse-cycle optimized and fold-left reductions go
> >> through their own vectorizable_* routines. */
> >> + stmt_vec_info tem
> >> + = SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (slp_node_instance));
> >> if (!single_defuse_cycle && reduction_type != FOLD_LEFT_REDUCTION)
> >> + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
> >> + else
> >> {
> >> - stmt_vec_info tem
> >> - = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> >> - if (REDUC_GROUP_FIRST_ELEMENT (tem))
> >> - {
> >> - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
> >> - tem = REDUC_GROUP_FIRST_ELEMENT (tem);
> >> - }
> >> - STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
> >> - STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
> >> + STMT_VINFO_DEF_TYPE (tem) = vect_reduction_def;
> >> + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> >> + vect_reduction_update_partial_vector_usage (loop_vinfo, reduc_info,
> >> + slp_node, op.code,
> >> op.type,
> >> + vectype_in);
> >> }
> >> - else if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> >> - vect_reduction_update_partial_vector_usage (loop_vinfo, reduc_info,
> >> - slp_node, op.code, op.type,
> >> - vectype_in);
> >> return true;
> >> }
> >>
> >> @@ -8244,8 +8083,6 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
> >> int i;
> >> bool nested_cycle = false;
> >> int vec_num;
> >> - const bool reduc_chain
> >> - = SLP_INSTANCE_KIND (slp_node_instance) == slp_inst_kind_reduc_chain;
> >>
> >> if (nested_in_vect_loop_p (loop, stmt_info))
> >> {
> >> @@ -8314,7 +8151,7 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
> >> vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
> >>
> >> unsigned int num_phis = stmts.length ();
> >> - if (reduc_chain)
> >> + if (reduc_info->is_reduc_chain)
> >> num_phis = 1;
> >> initial_values.reserve (num_phis);
> >> for (unsigned int i = 0; i < num_phis; ++i)
> >> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> >> index 74a9a1929ba..6a377e384a0 100644
> >> --- a/gcc/tree-vect-patterns.cc
> >> +++ b/gcc/tree-vect-patterns.cc
> >> @@ -1022,13 +1022,11 @@ vect_reassociating_reduction_p (vec_info *vinfo,
> >> if (loop && nested_in_vect_loop_p (loop, stmt_info))
> >> return false;
> >>
> >> - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
> >> - {
> >> - if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs
> >> (assign)),
> >> - code))
> >> - return false;
> >> - }
> >> - else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
> >> + if (!vect_is_reduction (stmt_info))
> >> + return false;
> >> +
> >> + if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
> >> + code))
> >> return false;
> >>
> >> *op0_out = gimple_assign_rhs1 (assign);
> >> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> >> index f553e8fba19..fe3bcff94a7 100644
> >> --- a/gcc/tree-vect-slp.cc
> >> +++ b/gcc/tree-vect-slp.cc
> >> @@ -4187,41 +4187,24 @@ vect_build_slp_instance (vec_info *vinfo,
> >> Return FALSE if SLP build fails. */
> >>
> >> static bool
> >> -vect_analyze_slp_reduc_chain (vec_info *vinfo,
> >> +vect_analyze_slp_reduc_chain (loop_vec_info vinfo,
> >> scalar_stmts_to_slp_tree_map_t *bst_map,
> >> - stmt_vec_info stmt_info,
> >> + vec<stmt_vec_info> &scalar_stmts,
> >> + stmt_vec_info reduc_phi_info,
> >> unsigned max_tree_size, unsigned *limit)
> >> {
> >> - vec<stmt_vec_info> scalar_stmts;
> >> -
> >> - /* Collect the reduction stmts and store them in scalar_stmts. */
> >> - scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
> >> - stmt_vec_info next_info = stmt_info;
> >> - while (next_info)
> >> - {
> >> - scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
> >> - next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
> >> - }
> >> - /* Mark the first element of the reduction chain as reduction to
> >> properly
> >> - transform the node. In the reduction analysis phase only the last
> >> - element of the chain is marked as reduction. */
> >> - STMT_VINFO_DEF_TYPE (stmt_info)
> >> - = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
> >> - STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
> >> - = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
> >> + /* If there's no budget left bail out early. */
> >> + if (*limit == 0)
> >> + return false;
> >>
> >> /* Build the tree for the SLP instance. */
> >> vec<stmt_vec_info> root_stmt_infos = vNULL;
> >> vec<tree> remain = vNULL;
> >>
> >> - /* If there's no budget left bail out early. */
> >> - if (*limit == 0)
> >> - return false;
> >> -
> >> if (dump_enabled_p ())
> >> {
> >> dump_printf_loc (MSG_NOTE, vect_location,
> >> - "Starting SLP discovery for\n");
> >> + "Starting SLP discovery of reduction chain for\n");
> >> for (unsigned i = 0; i < scalar_stmts.length (); ++i)
> >> dump_printf_loc (MSG_NOTE, vect_location,
> >> " %G", scalar_stmts[i]->stmt);
> >> @@ -4233,136 +4216,234 @@ vect_analyze_slp_reduc_chain (vec_info *vinfo,
> >> poly_uint64 max_nunits = 1;
> >> unsigned tree_size = 0;
> >>
> >> + /* ??? We need this only for SLP discovery. */
> >> + for (unsigned i = 0; i < scalar_stmts.length (); ++i)
> >> + REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = scalar_stmts[0];
> >> +
> >> slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
> >> &max_nunits, matches, limit,
> >> &tree_size, bst_map);
> >> +
> >> + for (unsigned i = 0; i < scalar_stmts.length (); ++i)
> >> + REDUC_GROUP_FIRST_ELEMENT (scalar_stmts[i]) = NULL;
> >> +
> >> if (node != NULL)
> >> {
> >> - /* Calculate the unrolling factor based on the smallest type. */
> >> - poly_uint64 unrolling_factor
> >> - = calculate_unrolling_factor (max_nunits, group_size);
> >> + /* Create a new SLP instance. */
> >> + slp_instance new_instance = XNEW (class _slp_instance);
> >> + SLP_INSTANCE_TREE (new_instance) = node;
> >> + SLP_INSTANCE_LOADS (new_instance) = vNULL;
> >> + SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
> >> + SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
> >> + SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
> >> + new_instance->reduc_phis = NULL;
> >> + new_instance->cost_vec = vNULL;
> >> + new_instance->subgraph_entries = vNULL;
> >>
> >> - if (maybe_ne (unrolling_factor, 1U)
> >> - && is_a <bb_vec_info> (vinfo))
> >> + vect_reduc_info reduc_info = info_for_reduction (vinfo, node);
> >> + reduc_info->is_reduc_chain = true;
> >> +
> >> + if (dump_enabled_p ())
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "SLP size %u vs. limit %u.\n",
> >> + tree_size, max_tree_size);
> >> +
> >> + /* Fixup SLP reduction chains. If this is a reduction chain with
> >> + a conversion in front amend the SLP tree with a node for that. */
> >> + gimple *scalar_def = STMT_VINFO_REDUC_DEF (reduc_phi_info)->stmt;
> >> + if (is_gimple_assign (scalar_def)
> >> + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (scalar_def)))
> >> + {
> >> + stmt_vec_info conv_info = vect_stmt_to_vectorize
> >> + (STMT_VINFO_REDUC_DEF
> >> (reduc_phi_info));
> >> + scalar_stmts = vNULL;
> >> + scalar_stmts.create (group_size);
> >> + for (unsigned i = 0; i < group_size; ++i)
> >> + scalar_stmts.quick_push (conv_info);
> >> + slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
> >> + SLP_TREE_VECTYPE (conv)
> >> + = get_vectype_for_scalar_type (vinfo,
> >> + TREE_TYPE
> >> + (gimple_assign_lhs
> >> (scalar_def)),
> >> + group_size);
> >> + SLP_TREE_REDUC_IDX (conv) = 0;
> >> + conv->cycle_info.id = node->cycle_info.id;
> >> + SLP_TREE_CHILDREN (conv).quick_push (node);
> >> + SLP_INSTANCE_TREE (new_instance) = conv;
> >> + }
> >> + /* Fill the backedge child of the PHI SLP node. The
> >> + general matching code cannot find it because the
> >> + scalar code does not reflect how we vectorize the
> >> + reduction. */
> >> + use_operand_p use_p;
> >> + imm_use_iterator imm_iter;
> >> + class loop *loop = LOOP_VINFO_LOOP (vinfo);
> >> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
> >> + gimple_get_lhs (scalar_def))
> >> + /* There are exactly two non-debug uses, the reduction
> >> + PHI and the loop-closed PHI node. */
> >> + if (!is_gimple_debug (USE_STMT (use_p))
> >> + && gimple_bb (USE_STMT (use_p)) == loop->header)
> >> + {
> >> + auto_vec<stmt_vec_info, 64> phis (group_size);
> >> + stmt_vec_info phi_info = vinfo->lookup_stmt (USE_STMT (use_p));
> >> + for (unsigned i = 0; i < group_size; ++i)
> >> + phis.quick_push (phi_info);
> >> + slp_tree *phi_node = bst_map->get (phis);
> >> + unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
> >> + SLP_TREE_CHILDREN (*phi_node)[dest_idx]
> >> + = SLP_INSTANCE_TREE (new_instance);
> >> + SLP_INSTANCE_TREE (new_instance)->refcnt++;
> >> + }
> >> +
> >> + vinfo->slp_instances.safe_push (new_instance);
> >> +
> >> + /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
> >> + the number of scalar stmts in the root in a few places.
> >> + Verify that assumption holds. */
> >> + gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
> >> + .length () == group_size);
> >> +
> >> + if (dump_enabled_p ())
> >> {
> >> - unsigned HOST_WIDE_INT const_max_nunits;
> >> - if (!max_nunits.is_constant (&const_max_nunits)
> >> - || const_max_nunits > group_size)
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "Final SLP tree for instance %p:\n",
> >> + (void *) new_instance);
> >> + vect_print_slp_graph (MSG_NOTE, vect_location,
> >> + SLP_INSTANCE_TREE (new_instance));
> >> + }
> >> +
> >> + return true;
> >> + }
> >> + /* Failed to SLP. */
> >> + if (dump_enabled_p ())
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "SLP discovery of reduction chain failed\n");
> >> + return false;
> >> +}
> >> +
> >> +/* Analyze an SLP instance starting from SCALAR_STMTS which are a group
> >> + of KIND. Return true if successful. */
> >> +
> >> +static bool
> >> +vect_analyze_slp_reduction (loop_vec_info vinfo,
> >> + stmt_vec_info scalar_stmt,
> >> + unsigned max_tree_size, unsigned *limit,
> >> + scalar_stmts_to_slp_tree_map_t *bst_map,
> >> + bool force_single_lane)
> >> +{
> >> + slp_instance_kind kind = slp_inst_kind_reduc_group;
> >> +
> >> + /* If there's no budget left bail out early. */
> >> + if (*limit == 0)
> >> + return false;
> >> +
> >> + vec<stmt_vec_info> scalar_stmts = vNULL;
> >> + /* Try to gather a reduction chain. */
> >> + if (! force_single_lane
> >> + && STMT_VINFO_DEF_TYPE (scalar_stmt) == vect_reduction_def)
> >> + {
> >> + bool fail = false;
> >> + /* ??? We could leave operation code checking to SLP discovery. */
> >> + code_helper code
> >> + = STMT_VINFO_REDUC_CODE (STMT_VINFO_REDUC_DEF
> >> + (vect_orig_stmt (scalar_stmt)));
> >> + bool first = true;
> >> + stmt_vec_info next_stmt = scalar_stmt;
> >> + do
> >> + {
> >> + stmt_vec_info stmt = next_stmt;
> >> + gimple_match_op op;
> >> + if (!gimple_extract_op (STMT_VINFO_STMT (stmt), &op))
> >> + gcc_unreachable ();
> >> + tree reduc_def = gimple_arg (STMT_VINFO_STMT (stmt),
> >> + STMT_VINFO_REDUC_IDX (stmt));
> >> + next_stmt = vect_stmt_to_vectorize (vinfo->lookup_def
> >> (reduc_def));
> >> + gcc_assert (is_a <gphi *> (STMT_VINFO_STMT (next_stmt))
> >> + || STMT_VINFO_REDUC_IDX (next_stmt) != -1);
> >> + if (!gimple_extract_op (STMT_VINFO_STMT (vect_orig_stmt (stmt)),
> >> &op))
> >> + gcc_unreachable ();
> >> + if (CONVERT_EXPR_CODE_P (op.code)
> >> + && (first
> >> + || is_a <gphi *> (STMT_VINFO_STMT (next_stmt))))
> >> + ;
> >> + else if (code != op.code)
> >> {
> >> - if (dump_enabled_p ())
> >> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >> - "Build SLP failed: store group "
> >> - "size not a multiple of the vector size "
> >> - "in basic block SLP\n");
> >> - vect_free_slp_tree (node);
> >> - return false;
> >> + fail = true;
> >> + break;
> >> }
> >> - /* Fatal mismatch. */
> >> - if (dump_enabled_p ())
> >> - dump_printf_loc (MSG_NOTE, vect_location,
> >> - "SLP discovery succeeded but node needs "
> >> - "splitting\n");
> >> - memset (matches, true, group_size);
> >> - matches[group_size / const_max_nunits * const_max_nunits] =
> >> false;
> >> - vect_free_slp_tree (node);
> >> + else
> >> + scalar_stmts.safe_push (stmt);
> >> + first = false;
> >> }
> >> - else
> >> + while (!is_a <gphi *> (STMT_VINFO_STMT (next_stmt)));
> >> + if (!fail && scalar_stmts.length () > 1)
> >> {
> >> - /* Create a new SLP instance. */
> >> - slp_instance new_instance = XNEW (class _slp_instance);
> >> - SLP_INSTANCE_TREE (new_instance) = node;
> >> - SLP_INSTANCE_LOADS (new_instance) = vNULL;
> >> - SLP_INSTANCE_ROOT_STMTS (new_instance) = root_stmt_infos;
> >> - SLP_INSTANCE_REMAIN_DEFS (new_instance) = remain;
> >> - SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_reduc_chain;
> >> - new_instance->reduc_phis = NULL;
> >> - new_instance->cost_vec = vNULL;
> >> - new_instance->subgraph_entries = vNULL;
> >> + scalar_stmts.reverse ();
> >> + if (vect_analyze_slp_reduc_chain (vinfo, bst_map, scalar_stmts,
> >> + next_stmt, max_tree_size,
> >> limit))
> >> + return true;
> >> + scalar_stmts.release ();
> >> + }
> >> + }
> >>
> >> - if (dump_enabled_p ())
> >> - dump_printf_loc (MSG_NOTE, vect_location,
> >> - "SLP size %u vs. limit %u.\n",
> >> - tree_size, max_tree_size);
> >> + scalar_stmts.create (1);
> >> + scalar_stmts.quick_push (scalar_stmt);
> >>
> >> - /* Fixup SLP reduction chains. If this is a reduction chain with
> >> - a conversion in front amend the SLP tree with a node for
> >> that. */
> >> - gimple *scalar_def
> >> - = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
> >> - if (STMT_VINFO_DEF_TYPE (scalar_stmts[0]) != vect_reduction_def)
> >> - {
> >> - /* Get at the conversion stmt - we know it's the single use
> >> - of the last stmt of the reduction chain. */
> >> - use_operand_p use_p;
> >> - bool r = single_imm_use (gimple_assign_lhs (scalar_def),
> >> - &use_p, &scalar_def);
> >> - gcc_assert (r);
> >> - stmt_vec_info next_info = vinfo->lookup_stmt (scalar_def);
> >> - next_info = vect_stmt_to_vectorize (next_info);
> >> - scalar_stmts = vNULL;
> >> - scalar_stmts.create (group_size);
> >> - for (unsigned i = 0; i < group_size; ++i)
> >> - scalar_stmts.quick_push (next_info);
> >> - slp_tree conv = vect_create_new_slp_node (scalar_stmts, 1);
> >> - SLP_TREE_VECTYPE (conv)
> >> - = get_vectype_for_scalar_type (vinfo,
> >> - TREE_TYPE
> >> - (gimple_assign_lhs
> >> (scalar_def)),
> >> - group_size);
> >> - SLP_TREE_REDUC_IDX (conv) = 0;
> >> - conv->cycle_info.id = node->cycle_info.id;
> >> - SLP_TREE_CHILDREN (conv).quick_push (node);
> >> - SLP_INSTANCE_TREE (new_instance) = conv;
> >> - /* We also have to fake this conversion stmt as SLP reduction
> >> - group so we don't have to mess with too much code
> >> - elsewhere. */
> >> - REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
> >> - REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
> >> - }
> >> - /* Fill the backedge child of the PHI SLP node. The
> >> - general matching code cannot find it because the
> >> - scalar code does not reflect how we vectorize the
> >> - reduction. */
> >> - use_operand_p use_p;
> >> - imm_use_iterator imm_iter;
> >> - class loop *loop = LOOP_VINFO_LOOP (as_a <loop_vec_info>
> >> (vinfo));
> >> - FOR_EACH_IMM_USE_FAST (use_p, imm_iter,
> >> - gimple_get_lhs (scalar_def))
> >> - /* There are exactly two non-debug uses, the reduction
> >> - PHI and the loop-closed PHI node. */
> >> - if (!is_gimple_debug (USE_STMT (use_p))
> >> - && gimple_bb (USE_STMT (use_p)) == loop->header)
> >> - {
> >> - auto_vec<stmt_vec_info, 64> phis (group_size);
> >> - stmt_vec_info phi_info
> >> - = vinfo->lookup_stmt (USE_STMT (use_p));
> >> - for (unsigned i = 0; i < group_size; ++i)
> >> - phis.quick_push (phi_info);
> >> - slp_tree *phi_node = bst_map->get (phis);
> >> - unsigned dest_idx = loop_latch_edge (loop)->dest_idx;
> >> - SLP_TREE_CHILDREN (*phi_node)[dest_idx]
> >> - = SLP_INSTANCE_TREE (new_instance);
> >> - SLP_INSTANCE_TREE (new_instance)->refcnt++;
> >> - }
> >> + if (dump_enabled_p ())
> >> + {
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "Starting SLP discovery for\n");
> >> + for (unsigned i = 0; i < scalar_stmts.length (); ++i)
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + " %G", scalar_stmts[i]->stmt);
> >> + }
> >>
> >> - vinfo->slp_instances.safe_push (new_instance);
> >> + /* Build the tree for the SLP instance. */
> >> + unsigned int group_size = scalar_stmts.length ();
> >> + bool *matches = XALLOCAVEC (bool, group_size);
> >> + poly_uint64 max_nunits = 1;
> >> + unsigned tree_size = 0;
> >>
> >> - /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
> >> - the number of scalar stmts in the root in a few places.
> >> - Verify that assumption holds. */
> >> - gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE
> >> (new_instance))
> >> - .length () == group_size);
> >> + slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
> >> + &max_nunits, matches, limit,
> >> + &tree_size, bst_map);
> >> + if (node != NULL)
> >> + {
> >> + /* Create a new SLP instance. */
> >> + slp_instance new_instance = XNEW (class _slp_instance);
> >> + SLP_INSTANCE_TREE (new_instance) = node;
> >> + SLP_INSTANCE_LOADS (new_instance) = vNULL;
> >> + SLP_INSTANCE_ROOT_STMTS (new_instance) = vNULL;
> >> + SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL;
> >> + SLP_INSTANCE_KIND (new_instance) = kind;
> >> + new_instance->reduc_phis = NULL;
> >> + new_instance->cost_vec = vNULL;
> >> + new_instance->subgraph_entries = vNULL;
> >>
> >> - if (dump_enabled_p ())
> >> - {
> >> - dump_printf_loc (MSG_NOTE, vect_location,
> >> - "Final SLP tree for instance %p:\n",
> >> - (void *) new_instance);
> >> - vect_print_slp_graph (MSG_NOTE, vect_location,
> >> - SLP_INSTANCE_TREE (new_instance));
> >> - }
> >> + if (dump_enabled_p ())
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "SLP size %u vs. limit %u.\n",
> >> + tree_size, max_tree_size);
> >>
> >> - return true;
> >> + vinfo->slp_instances.safe_push (new_instance);
> >> +
> >> + /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
> >> + the number of scalar stmts in the root in a few places.
> >> + Verify that assumption holds. */
> >> + gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
> >> + .length () == group_size);
> >> +
> >> + if (dump_enabled_p ())
> >> + {
> >> + dump_printf_loc (MSG_NOTE, vect_location,
> >> + "Final SLP tree for instance %p:\n",
> >> + (void *) new_instance);
> >> + vect_print_slp_graph (MSG_NOTE, vect_location,
> >> + SLP_INSTANCE_TREE (new_instance));
> >> }
> >> +
> >> + return true;
> >> }
> >> /* Failed to SLP. */
> >>
> >> @@ -5256,40 +5337,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned
> >> max_tree_size,
> >>
> >> if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> >> {
> >> - /* Find SLP sequences starting from reduction chains. */
> >> - FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
> >> - if (! STMT_VINFO_RELEVANT_P (first_element)
> >> - && ! STMT_VINFO_LIVE_P (first_element))
> >> - ;
> >> - else if (force_single_lane
> >> - || ! vect_analyze_slp_reduc_chain (vinfo, bst_map,
> >> - first_element,
> >> - max_tree_size, &limit))
> >> - {
> >> - if (dump_enabled_p ())
> >> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> >> - "SLP discovery of reduction chain
> >> failed\n");
> >> - /* Dissolve reduction chain group. */
> >> - stmt_vec_info vinfo = first_element;
> >> - stmt_vec_info last = NULL;
> >> - while (vinfo)
> >> - {
> >> - stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
> >> - REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
> >> - REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
> >> - last = vinfo;
> >> - vinfo = next;
> >> - }
> >> - STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
> >> - /* ??? When there's a conversion around the reduction
> >> - chain 'last' isn't the entry of the reduction. */
> >> - if (STMT_VINFO_DEF_TYPE (last) != vect_reduction_def)
> >> - return opt_result::failure_at (vect_location,
> >> - "SLP build failed.\n");
> >> - /* It can be still vectorized as part of an SLP reduction. */
> >> - loop_vinfo->reductions.safe_push (last);
> >> - }
> >> -
> >> /* Find SLP sequences starting from groups of reductions. */
> >> if (loop_vinfo->reductions.length () > 0)
> >> {
> >> @@ -5315,23 +5362,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned
> >> max_tree_size,
> >> if (!force_single_lane
> >> && !lane_reducing_stmt_p (STMT_VINFO_STMT
> >> (next_info)))
> >> scalar_stmts.quick_push (next_info);
> >> - else
> >> - {
> >> - /* Do SLP discovery for single-lane reductions. */
> >> - vec<stmt_vec_info> stmts;
> >> - vec<stmt_vec_info> roots = vNULL;
> >> - vec<tree> remain = vNULL;
> >> - stmts.create (1);
> >> - stmts.quick_push (next_info);
> >> - if (! vect_build_slp_instance (vinfo,
> >> -
> >> slp_inst_kind_reduc_group,
> >> - stmts, roots, remain,
> >> - max_tree_size, &limit,
> >> - bst_map,
> >> - force_single_lane))
> >> - return opt_result::failure_at (vect_location,
> >> - "SLP build
> >> failed.\n");
> >> - }
> >> + /* Do SLP discovery for single-lane reductions. */
> >> + else if (! vect_analyze_slp_reduction (loop_vinfo,
> >> next_info,
> >> + max_tree_size,
> >> &limit,
> >> + bst_map,
> >> +
> >> force_single_lane))
> >> + return opt_result::failure_at (vect_location,
> >> + "SLP build failed.\n");
> >> }
> >> }
> >> /* Save for re-processing on failure. */
> >> @@ -5349,20 +5386,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned
> >> max_tree_size,
> >> scalar_stmts.release ();
> >> /* Do SLP discovery for single-lane reductions. */
> >> for (auto stmt_info : saved_stmts)
> >> - {
> >> - vec<stmt_vec_info> stmts;
> >> - vec<stmt_vec_info> roots = vNULL;
> >> - vec<tree> remain = vNULL;
> >> - stmts.create (1);
> >> - stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
> >> - if (! vect_build_slp_instance (vinfo,
> >> - slp_inst_kind_reduc_group,
> >> - stmts, roots, remain,
> >> - max_tree_size, &limit,
> >> - bst_map,
> >> force_single_lane))
> >> - return opt_result::failure_at (vect_location,
> >> - "SLP build failed.\n");
> >> - }
> >> + if (! vect_analyze_slp_reduction (loop_vinfo,
> >> + vect_stmt_to_vectorize
> >> + (stmt_info),
> >> + max_tree_size, &limit,
> >> + bst_map,
> >> force_single_lane))
> >> + return opt_result::failure_at (vect_location,
> >> + "SLP build failed.\n");
> >> }
> >> saved_stmts.release ();
> >> }
> >> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> >> index 18e672b26a7..91d5ee08ac5 100644
> >> --- a/gcc/tree-vectorizer.h
> >> +++ b/gcc/tree-vectorizer.h
> >> @@ -843,6 +843,9 @@ public:
> >> following land-reducing operation would be assigned to. */
> >> unsigned int reduc_result_pos;
> >>
> >> + /* Whether this represents a reduction chain. */
> >> + bool is_reduc_chain;
> >> +
> >> /* Whether we force a single cycle PHI during reduction vectorization.
> >> */
> >> bool force_single_cycle;
> >>
> >> @@ -1065,10 +1068,6 @@ public:
> >> /* Reduction cycles detected in the loop. Used in loop-aware SLP. */
> >> auto_vec<stmt_vec_info> reductions;
> >>
> >> - /* All reduction chains in the loop, represented by the first
> >> - stmt in the chain. */
> >> - auto_vec<stmt_vec_info> reduction_chains;
> >> -
> >> /* Defs that could not be analyzed such as OMP SIMD calls without
> >> a LHS. */
> >> auto_vec<stmt_vec_info> alternate_defs;
> >> @@ -1289,7 +1288,6 @@ public:
> >> #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
> >> #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
> >> #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
> >> -#define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
> >> #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
> >> #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
> >> #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
> >> --
> >> 2.51.0