On Wed, 28 Jun 2023, Tamar Christina wrote: > Hi All, > > For early break vectorization we have to update niters analysis to record and > analyze all exits of the loop, and so all conds. > > The niters of the loop is still determined by the main/natural exit of the > loop > as this is the O(n) bounds. For now we don't do much with the secondary > conds, > but their assumptions can be used to generate versioning checks later. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master?
I probably confused vec_init_exit_info in the previous patch - that said, I'm missing a clear function that determines the natural exit of the original (if-converted) scalar loop. As vec_init_exit_info seems to (re-)compute that I'll comment on it here. + /* The main IV is to be determined by the block that's the first reachable + block from the latch. We cannot rely on the order the loop analysis + returns and we don't have any SCEV analysis on the loop. */ + auto_vec <edge> workset; + workset.safe_push (loop_latch_edge (loop)); + hash_set <edge> visited; + + while (!workset.is_empty ()) + { + edge e = workset.pop (); + if (visited.contains (e)) + continue; + + bool found_p = false; + for (edge ex : e->src->succs) + { + if (exits.contains (ex)) + { + found_p = true; + e = ex; + break; + } + } + + if (found_p) + { + loop->vec_loop_iv = e; + for (edge ex : exits) + if (e != ex) + loop->vec_loop_alt_exits.safe_push (ex); + return; + } + else + { + for (edge ex : e->src->preds) + workset.safe_insert (0, ex); + } + visited.add (e); + } So this greedily follows edges from the latch and takes the first exit. Why's that better than simply choosing the first? I'd have done auto_vec<edge> exits = get_loop_exit_edges (loop); for (e : exits) { if (vect_get_loop_niters (...)) { if no assumptions use that edge, if assumptions continue searching, maybe ther's an edge w/o assumptions } } use (first) exit with assumptions we probably want to know 'may_be_zero' as well and prefer an edge without that. So eventually call number_of_iterations_exit_assumptions directly and look for the best niter_desc and pass that to vect_get_loop_niters (or re-do the work). As said for "copying" the exit to the loop copies use the block mapping. > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vect-loop.cc (vect_get_loop_niters): Analyze all exits and return > all gconds. > (vect_analyze_loop_form): Update code checking for conds. > (vect_create_loop_vinfo): Handle having multiple conds. > (vect_analyze_loop): Release extra loop conds structures. > * tree-vectorizer.h (LOOP_VINFO_LOOP_CONDS, > LOOP_VINFO_LOOP_IV_COND): New. > (struct vect_loop_form_info): Add conds, loop_iv_cond. > > --- inline copy of patch -- > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index > 55e69a7ca0b24e0872477141db6f74dbf90b7981..9065811b3b9c2a550baf44768603172b9e26b94b > 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -849,80 +849,106 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info > loop_vinfo) > in NUMBER_OF_ITERATIONSM1. Place the condition under which the > niter information holds in ASSUMPTIONS. > > - Return the loop exit condition. */ > + Return the loop exit conditions. */ > > > -static gcond * > +static vec<gcond *> > vect_get_loop_niters (class loop *loop, tree *assumptions, > tree *number_of_iterations, tree *number_of_iterationsm1) > { > - edge exit = single_exit (loop); > + auto_vec<edge> exits = get_loop_exit_edges (loop); > + vec<gcond *> conds; > + conds.create (exits.length ()); > class tree_niter_desc niter_desc; > tree niter_assumptions, niter, may_be_zero; > - gcond *cond = get_loop_exit_condition (loop); > > *assumptions = boolean_true_node; > *number_of_iterationsm1 = chrec_dont_know; > *number_of_iterations = chrec_dont_know; > + > DUMP_VECT_SCOPE ("get_loop_niters"); > > - if (!exit) > - return cond; > + if (exits.is_empty ()) > + return conds; > > - may_be_zero = NULL_TREE; > - if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL) > - || chrec_contains_undetermined (niter_desc.niter)) > - return cond; > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, "Loop has %d exits.\n", > + exits.length ()); > > - niter_assumptions = niter_desc.assumptions; > - may_be_zero = niter_desc.may_be_zero; > - niter = niter_desc.niter; > + edge exit; > + unsigned int i; > + FOR_EACH_VEC_ELT (exits, i, exit) > + { > + gcond *cond = get_edge_condition (exit); > + if (cond) > + conds.safe_push (cond); > > - if (may_be_zero && integer_zerop (may_be_zero)) > - may_be_zero = NULL_TREE; > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, "Analyzing exit %d...\n", i); > > - if (may_be_zero) > - { > - if (COMPARISON_CLASS_P (may_be_zero)) > + may_be_zero = NULL_TREE; > + if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, > NULL) > + || chrec_contains_undetermined (niter_desc.niter)) > + continue; > + > + niter_assumptions = niter_desc.assumptions; > + may_be_zero = niter_desc.may_be_zero; > + niter = niter_desc.niter; > + > + if (may_be_zero && integer_zerop (may_be_zero)) > + may_be_zero = NULL_TREE; > + > + if (may_be_zero) > { > - /* Try to combine may_be_zero with assumptions, this can simplify > - computation of niter expression. */ > - if (niter_assumptions && !integer_nonzerop (niter_assumptions)) > - niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, > - niter_assumptions, > - fold_build1 (TRUTH_NOT_EXPR, > - boolean_type_node, > - may_be_zero)); > + if (COMPARISON_CLASS_P (may_be_zero)) > + { > + /* Try to combine may_be_zero with assumptions, this can simplify > + computation of niter expression. */ > + if (niter_assumptions && !integer_nonzerop (niter_assumptions)) > + niter_assumptions = fold_build2 (TRUTH_AND_EXPR, > boolean_type_node, > + niter_assumptions, > + fold_build1 (TRUTH_NOT_EXPR, > + boolean_type_node, > + may_be_zero)); > + else > + niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero, > + build_int_cst (TREE_TYPE (niter), 0), > + rewrite_to_non_trapping_overflow (niter)); > + > + may_be_zero = NULL_TREE; > + } > + else if (integer_nonzerop (may_be_zero) && exit == loop->vec_loop_iv) > + { > + *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0); > + *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1); > + continue; > + } > else > - niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero, > - build_int_cst (TREE_TYPE (niter), 0), > - rewrite_to_non_trapping_overflow (niter)); > + continue; > + } > > - may_be_zero = NULL_TREE; > - } > - else if (integer_nonzerop (may_be_zero)) > + /* Loop assumptions are based off the normal exit. */ > + if (exit == loop->vec_loop_iv) > { > - *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0); > - *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1); > - return cond; > + *assumptions = niter_assumptions; > + *number_of_iterationsm1 = niter; > + > + /* We want the number of loop header executions which is the number > + of latch executions plus one. > + ??? For UINT_MAX latch executions this number overflows to zero > + for loops like do { n++; } while (n != 0); */ > + if (niter && !chrec_contains_undetermined (niter)) > + niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), > + unshare_expr (niter), > + build_int_cst (TREE_TYPE (niter), 1)); > + *number_of_iterations = niter; > } > - else > - return cond; > } > > - *assumptions = niter_assumptions; > - *number_of_iterationsm1 = niter; > - > - /* We want the number of loop header executions which is the number > - of latch executions plus one. > - ??? For UINT_MAX latch executions this number overflows to zero > - for loops like do { n++; } while (n != 0); */ > - if (niter && !chrec_contains_undetermined (niter)) > - niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter), > - build_int_cst (TREE_TYPE (niter), 1)); > - *number_of_iterations = niter; > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, "All loop exits successfully > analyzed.\n"); > > - return cond; > + return conds; > } > > /* Function bb_in_loop_p > @@ -1768,15 +1794,26 @@ vect_analyze_loop_form (class loop *loop, > vect_loop_form_info *info) > "not vectorized:" > " abnormal loop exit edge.\n"); > > - info->loop_cond > + info->conds > = vect_get_loop_niters (loop, &info->assumptions, > &info->number_of_iterations, > &info->number_of_iterationsm1); > - if (!info->loop_cond) > + > + if (info->conds.is_empty ()) > return opt_result::failure_at > (vect_location, > "not vectorized: complicated exit condition.\n"); > > + /* Determine what the primary and alternate exit conds are. */ > + info->alt_loop_conds.create (info->conds.length () - 1); > + for (gcond *cond : info->conds) > + { > + if (loop->vec_loop_iv->src != gimple_bb (cond)) > + info->alt_loop_conds.quick_push (cond); > + else > + info->loop_cond = cond; > + } Do you really need those explicitely? ->conds and ->alt_loop_conds looks redundant at least. > + > if (integer_zerop (info->assumptions) > || !info->number_of_iterations > || chrec_contains_undetermined (info->number_of_iterations)) > @@ -1821,8 +1858,14 @@ vect_create_loop_vinfo (class loop *loop, > vec_info_shared *shared, > if (!integer_onep (info->assumptions) && !main_loop_info) > LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions; > > - stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond); > - STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type; > + for (gcond *cond : info->alt_loop_conds) > + { > + stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond); > + STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type; > + } > + LOOP_VINFO_LOOP_CONDS (loop_vinfo).safe_splice (info->alt_loop_conds); > + LOOP_VINFO_LOOP_IV_COND (loop_vinfo) = info->loop_cond; > + > if (info->inner_loop_cond) > { > stmt_vec_info inner_loop_cond_info > @@ -3520,6 +3563,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > "***** Choosing vector mode %s\n", > GET_MODE_NAME (first_loop_vinfo->vector_mode)); > > + loop_form_info.conds.release (); > + loop_form_info.alt_loop_conds.release (); > + > /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is > enabled, SIMDUID is not set, it is the innermost loop and we have > either already found the loop's SIMDLEN or there was no SIMDLEN to > @@ -3631,6 +3677,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared > *shared) > (first_loop_vinfo->epilogue_vinfos[0]->vector_mode)); > } > > + loop_form_info.conds.release (); > + loop_form_info.alt_loop_conds.release (); > + > return first_loop_vinfo; > } > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index > bd5eceb5da7a45ef036cd14609ebe091799320bf..1cc003c12e2447eca878f56cb019236f56e96f85 > 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -876,6 +876,12 @@ public: > we need to peel off iterations at the end to form an epilogue loop. */ > bool peeling_for_niter; > > + /* List of loop additional IV conditionals found in the loop. */ drop "IV" > + auto_vec<gcond *> conds; > + > + /* Main loop IV cond. */ > + gcond* loop_iv_cond; > + I guess I have to look at the followup patches to see how often we have to access loop_iv_cond/conds. > /* True if there are no loop carried data dependencies in the loop. > If loop->safelen <= 1, then this is always true, either the loop > didn't have any loop carried data dependencies, or the loop is being > @@ -966,6 +972,8 @@ public: > #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains > #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps > #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter > +#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds > +#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond > #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies > #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop > #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling > @@ -2353,7 +2361,9 @@ struct vect_loop_form_info > tree number_of_iterations; > tree number_of_iterationsm1; > tree assumptions; > + vec<gcond *> conds; > gcond *loop_cond; > + vec<gcond *> alt_loop_conds; > gcond *inner_loop_cond; > }; > extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info > *); > > > > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)