Hi,

Having simplified peeling this patch becomes smaller as well:

This changes the PHI node updates to support early breaks.
It has to support both the case where the loop's exit matches the normal loop
exit and one where the early exit is "inverted", i.e. it's an early exit edge.

In the latter case we must always restart the loop for VF iterations.  For an
early exit the reason is obvious, but there are cases where the "normal" exit
is located before the early one.  This exit then does a check on ivtmp resulting
in us leaving the loop since it thinks we're done.

In these case we may still have side-effects to perform so we also go to the
scalar loop.

For the "normal" exit niters has already been adjusted for peeling, for the
early exits we must find out how many iterations we actually did.  So we have
to recalculate the new position for each exit.

For the "inverse" case we essentially peel a vector iteration *after* the vector
loop has finished. i.e. conceptually it's the same as vect epilogue peeling but
without generating code for the peeled iteration.  That'll be handled by the
scalar loop.

To do this we just adjust niters_vector_mult_vf and remove one VF and for masked
cases we do the same with final_iv.

The normal IV update code will then generate the correct values for us.
Eventually VRP will simplify the constant bounds and we get the proper scalar
unrolling.  This means we don't have to make any changes at all to
vect_update_ivs_after_vectorizer but dropping some asserts.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (vect_set_loop_condition_partial_vectors,
        vect_set_loop_condition_partial_vectors_avx512,
        vect_gen_vector_loop_niters_mult_vf): Support peeling a vector
        iteration.
        (vect_update_ivs_after_vectorizer): Drop asserts.
        (vect_do_peeling): Skip forwarder edge.
        (vect_is_loop_exit_latch_pred): New.
        * tree-vectorizer.h (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED): New.

--- inline copy of patch ---

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 
d61d7c3a189b279fc3bcbb58c3c0e32521db3cf8..476be8a0bb6da2d06c4ca7052cb07bacecca60b1
 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -951,7 +951,18 @@ vect_set_loop_condition_partial_vectors (class loop *loop, 
edge exit_edge,
 
   if (final_iv)
     {
-      gassign *assign = gimple_build_assign (final_iv, orig_niters);
+      gassign *assign;
+      /* If vectorizing an inverted early break loop we have to restart the
+        scalar loop at niters - vf.  This matches what we do in
+        vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
+      if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+       {
+         tree ftype = TREE_TYPE (orig_niters);
+         tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+         assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
+       }
+       else
+       assign = gimple_build_assign (final_iv, orig_niters);
       gsi_insert_on_edge_immediate (exit_edge, assign);
     }
 
@@ -1188,8 +1199,19 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
loop *loop,
 
   if (final_iv)
     {
-      gassign *assign = gimple_build_assign (final_iv, orig_niters);
-      gsi_insert_on_edge_immediate (single_exit (loop), assign);
+      gassign *assign;
+      /* If vectorizing an inverted early break loop we have to restart the
+        scalar loop at niters - vf.  This matches what we do in
+        vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
+      if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+       {
+         tree ftype = TREE_TYPE (orig_niters);
+         tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+         assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
+       }
+       else
+       assign = gimple_build_assign (final_iv, orig_niters);
+      gsi_insert_on_edge_immediate (exit_edge, assign);
     }
 
   return cond_stmt;
@@ -2157,11 +2179,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
loop_vinfo,
   gphi_iterator gsi, gsi1;
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block update_bb = update_e->dest;
-
   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
-
-  /* Make sure there exists a single-predecessor exit bb:  */
-  gcc_assert (single_pred_p (exit_bb));
+  gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
 
   for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
        !gsi_end_p (gsi) && !gsi_end_p (gsi1);
@@ -2171,7 +2190,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
loop_vinfo,
       tree step_expr, off;
       tree type;
       tree var, ni, ni_name;
-      gimple_stmt_iterator last_gsi;
 
       gphi *phi = gsi.phi ();
       gphi *phi1 = gsi1.phi ();
@@ -2207,7 +2225,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
loop_vinfo,
        {
          tree stype = TREE_TYPE (step_expr);
          off = fold_build2 (MULT_EXPR, stype,
-                            fold_convert (stype, niters), step_expr);
+                              fold_convert (stype, niters), step_expr);
+
          if (POINTER_TYPE_P (type))
            ni = fold_build_pointer_plus (init_expr, off);
          else
@@ -2226,9 +2245,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
loop_vinfo,
 
       var = create_tmp_var (type, "tmp");
 
-      last_gsi = gsi_last_bb (exit_bb);
       gimple_seq new_stmts = NULL;
       ni_name = force_gimple_operand (ni, &new_stmts, false, var);
+
       /* Exit_bb shouldn't be empty.  */
       if (!gsi_end_p (last_gsi))
        {
@@ -2726,11 +2745,19 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info 
loop_vinfo,
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
   tree type = TREE_TYPE (niters_vector);
   tree log_vf = build_int_cst (type, exact_log2 (vf));
+  tree tree_vf = build_int_cst (type, vf);
   basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
 
   gcc_assert (niters_vector_mult_vf_ptr != NULL);
   tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
                                            niters_vector, log_vf);
+
+  /* If we've peeled a vector iteration then subtract one full vector
+     iteration.  */
+  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
+    niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
+                                        niters_vector_mult_vf, tree_vf);
+
   if (!is_gimple_val (niters_vector_mult_vf))
     {
       tree var = create_tmp_var (type, "niters_vector_mult_vf");
@@ -3328,6 +3355,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
         niters_vector_mult_vf steps.  */
       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
       update_e = skip_vector ? e : loop_preheader_edge (epilog);
+      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+       update_e = single_succ_edge (e->dest);
+
+      /* Update the main exit.  */
       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
                                        update_e);
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 
39aa4d1250efe308acccf484d370f8adfd1ba843..de60da31e2a3030a7fbc302d3f676af9683fd019
 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1016,6 +1016,8 @@ public:
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
 #define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
+#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L)  \
+  (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
 #define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
 #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
 #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
@@ -2224,6 +2226,7 @@ extern dump_user_location_t find_loop_location (class 
loop *);
 extern bool vect_can_advance_ivs_p (loop_vec_info);
 extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
 extern edge vec_init_loop_exit_info (class loop *);
+extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool *);
 
 /* In tree-vect-stmts.cc.  */
 extern tree get_related_vectype_for_scalar_type (machine_mode, tree,

Attachment: rb17967.patch
Description: rb17967.patch

Reply via email to