On Wed, 15 Nov 2023, Tamar Christina wrote:

> Patch updated to latest trunk:
> 
> Hi All,
> 
> This changes the PHI node updates to support early breaks.
> It has to support both the case where the loop's exit matches the normal loop
> exit and one where the early exit is "inverted", i.e. it's an early exit edge.
> 
> In the latter case we must always restart the loop for VF iterations.  For an
> early exit the reason is obvious, but there are cases where the "normal" exit
> is located before the early one.  This exit then does a check on ivtmp 
> resulting
> in us leaving the loop since it thinks we're done.
> 
> In these case we may still have side-effects to perform so we also go to the
> scalar loop.
> 
> For the "normal" exit niters has already been adjusted for peeling, for the
> early exits we must find out how many iterations we actually did.  So we have
> to recalculate the new position for each exit.
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * tree-vect-loop-manip.cc (vect_set_loop_condition_normal): Hide unused.
>       (vect_update_ivs_after_vectorizer): Support early break.
>       (vect_do_peeling): Use it.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 
> d3fa8699271c4d7f404d648a38a95beabeabc99a..e1d210ab4617c894dab3d2654cf1c842baac58f5
>  100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1200,7 +1200,7 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
> loop *loop,
>     loop handles exactly VF scalars per iteration.  */
>  
>  static gcond *
> -vect_set_loop_condition_normal (loop_vec_info loop_vinfo, edge exit_edge,
> +vect_set_loop_condition_normal (loop_vec_info /* loop_vinfo */, edge 
> exit_edge,
>                               class loop *loop, tree niters, tree step,
>                               tree final_iv, bool niters_maybe_zero,
>                               gimple_stmt_iterator loop_cond_gsi)
> @@ -1412,7 +1412,7 @@ vect_set_loop_condition (class loop *loop, edge loop_e, 
> loop_vec_info loop_vinfo
>     When this happens we need to flip the understanding of main and other
>     exits by peeling and IV updates.  */
>  
> -bool inline
> +bool
>  vect_is_loop_exit_latch_pred (edge loop_exit, class loop *loop)
>  {
>    return single_pred (loop->latch) == loop_exit->src;
> @@ -2142,6 +2142,7 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
>       Input:
>       - LOOP - a loop that is going to be vectorized. The last few iterations
>                of LOOP were peeled.
> +     - VF   - The chosen vectorization factor for LOOP.
>       - NITERS - the number of iterations that LOOP executes (before it is
>                  vectorized). i.e, the number of times the ivs should be 
> bumped.
>       - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path

the comment on this is now a bit misleading, can you try to update it
and/or move the comment bits to the docs on EARLY_EXIT?

> @@ -2152,6 +2153,9 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
>                    The phi args associated with the edge UPDATE_E in the bb
>                    UPDATE_E->dest are updated accordingly.
>  
> +     - restart_loop - Indicates whether the scalar loop needs to restart the

params are ALL_CAPS

> +                   iteration count where the vector loop began.
> +
>       Assumption 1: Like the rest of the vectorizer, this function assumes
>       a single loop exit that has a single predecessor.
>  
> @@ -2169,18 +2173,22 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
>   */
>  
>  static void
> -vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
> -                               tree niters, edge update_e)
> +vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, poly_uint64 vf,

LOOP_VINFO_VECT_FACTOR?

> +                               tree niters, edge update_e, bool restart_loop)

I think 'bool early_exit' is better here?  I wonder if we have an "early"
exit after the main exit we are probably sure there are no side-effects
to re-execute and could avoid this restarting?

>  {
>    gphi_iterator gsi, gsi1;
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>    basic_block update_bb = update_e->dest;
> -
> -  basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
> -
> -  /* Make sure there exists a single-predecessor exit bb:  */
> -  gcc_assert (single_pred_p (exit_bb));
> -  gcc_assert (single_succ_edge (exit_bb) == update_e);
> +  bool inversed_iv
> +     = !vect_is_loop_exit_latch_pred (LOOP_VINFO_IV_EXIT (loop_vinfo),
> +                                      LOOP_VINFO_LOOP (loop_vinfo));
> +  bool needs_interm_block = LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
> +                         && flow_bb_inside_loop_p (loop, update_e->src);
> +  edge loop_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
> +  gcond *cond = get_loop_exit_condition (loop_e);
> +  basic_block exit_bb = loop_e->dest;
> +  basic_block iv_block = NULL;
> +  gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
>  
>    for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis 
> (update_bb);
>         !gsi_end_p (gsi) && !gsi_end_p (gsi1);
> @@ -2190,7 +2198,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>        tree step_expr, off;
>        tree type;
>        tree var, ni, ni_name;
> -      gimple_stmt_iterator last_gsi;
>  
>        gphi *phi = gsi.phi ();
>        gphi *phi1 = gsi1.phi ();
> @@ -2222,11 +2229,52 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>        enum vect_induction_op_type induction_type
>       = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
>  
> -      if (induction_type == vect_step_op_add)
> +      tree iv_var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
> +      /* create_iv always places it on the LHS.  Alternatively we can set a
> +      property during create_iv to identify it.  */
> +      bool ivtemp = gimple_cond_lhs (cond) == iv_var;
> +      if (restart_loop && ivtemp)
>       {
> +       type = TREE_TYPE (gimple_phi_result (phi));
> +       ni = build_int_cst (type, vf);
> +       if (inversed_iv)
> +         ni = fold_build2 (MINUS_EXPR, type, ni,
> +                           fold_convert (type, step_expr));
> +     }
> +      else if (induction_type == vect_step_op_add)
> +     {
> +
>         tree stype = TREE_TYPE (step_expr);
> -       off = fold_build2 (MULT_EXPR, stype,
> -                          fold_convert (stype, niters), step_expr);
> +
> +       /* Early exits always use last iter value not niters. */
> +       if (restart_loop)
> +         {
> +           /* Live statements in the non-main exit shouldn't be adjusted.  We
> +              normally didn't have this problem with a single exit as live
> +              values would be in the exit block.  However when dealing with
> +              multiple exits all exits are redirected to the merge block
> +              and we restart the iteration.  */

Hmm, I fail to see how this works - we're either using the value to 
continue the induction or not, independent of STMT_VINFO_LIVE_P.

> +           if (STMT_VINFO_LIVE_P (phi_info))
> +             continue;
> +
> +           /* For early break the final loop IV is:
> +              init + (final - init) * vf which takes into account peeling
> +              values and non-single steps.  The main exit can use niters
> +              since if you exit from the main exit you've done all vector
> +              iterations.  For an early exit we don't know when we exit so we
> +              must re-calculate this on the exit.  */
> +           tree start_expr = gimple_phi_result (phi);
> +           off = fold_build2 (MINUS_EXPR, stype,
> +                              fold_convert (stype, start_expr),
> +                              fold_convert (stype, init_expr));
> +           /* Now adjust for VF to get the final iteration value.  */
> +           off = fold_build2 (MULT_EXPR, stype, off,
> +                              build_int_cst (stype, vf));
> +         }
> +       else
> +         off = fold_build2 (MULT_EXPR, stype,
> +                            fold_convert (stype, niters), step_expr);
> +
>         if (POINTER_TYPE_P (type))
>           ni = fold_build_pointer_plus (init_expr, off);
>         else
> @@ -2238,6 +2286,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>        /* Don't bother call vect_peel_nonlinear_iv_init.  */
>        else if (induction_type == vect_step_op_neg)
>       ni = init_expr;
> +      else if (restart_loop)
> +     continue;

This looks all a bit complicated - why wouldn't we simply always
use the PHI result when 'restart_loop'?  Isn't that the correct old start
value in all cases?

>        else
>       ni = vect_peel_nonlinear_iv_init (&stmts, init_expr,
>                                         niters, step_expr,
> @@ -2245,9 +2295,20 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>  
>        var = create_tmp_var (type, "tmp");
>  
> -      last_gsi = gsi_last_bb (exit_bb);
>        gimple_seq new_stmts = NULL;
>        ni_name = force_gimple_operand (ni, &new_stmts, false, var);
> +
> +      /* For non-main exit create an intermediat edge to get any updated iv
> +      calculations.  */
> +      if (needs_interm_block
> +       && !iv_block
> +       && (!gimple_seq_empty_p (stmts) || !gimple_seq_empty_p (new_stmts)))
> +     {
> +       iv_block = split_edge (update_e);
> +       update_e = single_succ_edge (update_e->dest);
> +       last_gsi = gsi_last_bb (iv_block);
> +     }
> +
>        /* Exit_bb shouldn't be empty.  */
>        if (!gsi_end_p (last_gsi))
>       {
> @@ -3342,8 +3403,26 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree 
> niters, tree nitersm1,
>        niters_vector_mult_vf steps.  */
>        gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
>        update_e = skip_vector ? e : loop_preheader_edge (epilog);
> -      vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
> -                                     update_e);
> +      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +     update_e = single_succ_edge (e->dest);
> +      bool inversed_iv
> +     = !vect_is_loop_exit_latch_pred (LOOP_VINFO_IV_EXIT (loop_vinfo),
> +                                      LOOP_VINFO_LOOP (loop_vinfo));

You are computing this here and in vect_update_ivs_after_vectorizer?

> +
> +      /* Update the main exit first.  */
> +      vect_update_ivs_after_vectorizer (loop_vinfo, vf, 
> niters_vector_mult_vf,
> +                                     update_e, inversed_iv);
> +
> +      /* And then update the early exits.  */
> +      for (auto exit : get_loop_exit_edges (loop))
> +     {
> +       if (exit == LOOP_VINFO_IV_EXIT (loop_vinfo))
> +         continue;
> +
> +       vect_update_ivs_after_vectorizer (loop_vinfo, vf,
> +                                         niters_vector_mult_vf,
> +                                         exit, true);

... why does the same not work here?  Wouldn't the proper condition
be !dominated_by_p (CDI_DOMINATORS, exit->src, LOOP_VINFO_IV_EXIT 
(loop_vinfo)->src) or similar?  That is, whether the exit is at or
after the main IV exit?  (consider having two)

> +     }
>  
>        if (skip_epilog)
>       {
> 

Reply via email to