On Fri, 24 Nov 2023, Tamar Christina wrote:
> Hi,
>
> Having simplified peeling this patch becomes smaller as well:
>
> This changes the PHI node updates to support early breaks.
> It has to support both the case where the loop's exit matches the normal loop
> exit and one where the early exit is "inverted", i.e. it's an early exit edge.
>
> In the latter case we must always restart the loop for VF iterations. For an
> early exit the reason is obvious, but there are cases where the "normal" exit
> is located before the early one. This exit then does a check on ivtmp
> resulting
> in us leaving the loop since it thinks we're done.
>
> In these case we may still have side-effects to perform so we also go to the
> scalar loop.
>
> For the "normal" exit niters has already been adjusted for peeling, for the
> early exits we must find out how many iterations we actually did. So we have
> to recalculate the new position for each exit.
>
> For the "inverse" case we essentially peel a vector iteration *after* the
> vector
> loop has finished. i.e. conceptually it's the same as vect epilogue peeling
> but
> without generating code for the peeled iteration. That'll be handled by the
> scalar loop.
>
> To do this we just adjust niters_vector_mult_vf and remove one VF and for
> masked
> cases we do the same with final_iv.
>
> The normal IV update code will then generate the correct values for us.
> Eventually VRP will simplify the constant bounds and we get the proper scalar
> unrolling. This means we don't have to make any changes at all to
> vect_update_ivs_after_vectorizer but dropping some asserts.
>
> Ok for master?
Nice. OK.
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * tree-vect-loop-manip.cc (vect_set_loop_condition_partial_vectors,
> vect_set_loop_condition_partial_vectors_avx512,
> vect_gen_vector_loop_niters_mult_vf): Support peeling a vector
> iteration.
> (vect_update_ivs_after_vectorizer): Drop asserts.
> (vect_do_peeling): Skip forwarder edge.
> (vect_is_loop_exit_latch_pred): New.
> * tree-vectorizer.h (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED): New.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index
> d61d7c3a189b279fc3bcbb58c3c0e32521db3cf8..476be8a0bb6da2d06c4ca7052cb07bacecca60b1
> 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -951,7 +951,18 @@ vect_set_loop_condition_partial_vectors (class loop
> *loop, edge exit_edge,
>
> if (final_iv)
> {
> - gassign *assign = gimple_build_assign (final_iv, orig_niters);
> + gassign *assign;
> + /* If vectorizing an inverted early break loop we have to restart the
> + scalar loop at niters - vf. This matches what we do in
> + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> + {
> + tree ftype = TREE_TYPE (orig_niters);
> + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
> + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
> + }
> + else
> + assign = gimple_build_assign (final_iv, orig_niters);
> gsi_insert_on_edge_immediate (exit_edge, assign);
> }
>
> @@ -1188,8 +1199,19 @@ vect_set_loop_condition_partial_vectors_avx512 (class
> loop *loop,
>
> if (final_iv)
> {
> - gassign *assign = gimple_build_assign (final_iv, orig_niters);
> - gsi_insert_on_edge_immediate (single_exit (loop), assign);
> + gassign *assign;
> + /* If vectorizing an inverted early break loop we have to restart the
> + scalar loop at niters - vf. This matches what we do in
> + vect_gen_vector_loop_niters_mult_vf for non-masked loops. */
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> + {
> + tree ftype = TREE_TYPE (orig_niters);
> + tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
> + assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
> + }
> + else
> + assign = gimple_build_assign (final_iv, orig_niters);
> + gsi_insert_on_edge_immediate (exit_edge, assign);
> }
>
> return cond_stmt;
> @@ -2157,11 +2179,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info
> loop_vinfo,
> gphi_iterator gsi, gsi1;
> class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> basic_block update_bb = update_e->dest;
> -
> basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
> -
> - /* Make sure there exists a single-predecessor exit bb: */
> - gcc_assert (single_pred_p (exit_bb));
> + gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
>
> for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis
> (update_bb);
> !gsi_end_p (gsi) && !gsi_end_p (gsi1);
> @@ -2171,7 +2190,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info
> loop_vinfo,
> tree step_expr, off;
> tree type;
> tree var, ni, ni_name;
> - gimple_stmt_iterator last_gsi;
>
> gphi *phi = gsi.phi ();
> gphi *phi1 = gsi1.phi ();
> @@ -2207,7 +2225,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info
> loop_vinfo,
> {
> tree stype = TREE_TYPE (step_expr);
> off = fold_build2 (MULT_EXPR, stype,
> - fold_convert (stype, niters), step_expr);
> + fold_convert (stype, niters), step_expr);
> +
> if (POINTER_TYPE_P (type))
> ni = fold_build_pointer_plus (init_expr, off);
> else
> @@ -2226,9 +2245,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info
> loop_vinfo,
>
> var = create_tmp_var (type, "tmp");
>
> - last_gsi = gsi_last_bb (exit_bb);
> gimple_seq new_stmts = NULL;
> ni_name = force_gimple_operand (ni, &new_stmts, false, var);
> +
> /* Exit_bb shouldn't be empty. */
> if (!gsi_end_p (last_gsi))
> {
> @@ -2726,11 +2745,19 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info
> loop_vinfo,
> int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
> tree type = TREE_TYPE (niters_vector);
> tree log_vf = build_int_cst (type, exact_log2 (vf));
> + tree tree_vf = build_int_cst (type, vf);
> basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
>
> gcc_assert (niters_vector_mult_vf_ptr != NULL);
> tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
> niters_vector, log_vf);
> +
> + /* If we've peeled a vector iteration then subtract one full vector
> + iteration. */
> + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> + niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
> + niters_vector_mult_vf, tree_vf);
> +
> if (!is_gimple_val (niters_vector_mult_vf))
> {
> tree var = create_tmp_var (type, "niters_vector_mult_vf");
> @@ -3328,6 +3355,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree
> niters, tree nitersm1,
> niters_vector_mult_vf steps. */
> gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
> update_e = skip_vector ? e : loop_preheader_edge (epilog);
> + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> + update_e = single_succ_edge (e->dest);
> +
> + /* Update the main exit. */
> vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
> update_e);
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index
> 39aa4d1250efe308acccf484d370f8adfd1ba843..de60da31e2a3030a7fbc302d3f676af9683fd019
> 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1016,6 +1016,8 @@ public:
> #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
> #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
> #define LOOP_VINFO_EARLY_BREAKS(L) (L)->early_breaks
> +#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \
> + (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
> #define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
> #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
> #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
> @@ -2224,6 +2226,7 @@ extern dump_user_location_t find_loop_location (class
> loop *);
> extern bool vect_can_advance_ivs_p (loop_vec_info);
> extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
> extern edge vec_init_loop_exit_info (class loop *);
> +extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool
> *);
>
> /* In tree-vect-stmts.cc. */
> extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)