Given how present requirements for loops, early-break or otherwise, to
have a known iteration count, there is currently no need for
single-exit loops to reset induction variables and accumulators prior
to entering the exit loop.

For multiple-exit uncounted loops, there are provisions in the code
for resetting IVs and accumulators on exiting the loop via early
exits.  This is extended to the main exit (though only in
multiple-exit loops) if `peeled_iters' is set to `true', wherein the
definition of `peeled_iters' is equivalent to that of
LOOP_VINFO_EARLY_BREAKS_VECT_PEELED, but is evaluated independently as
the function does not have access to loop_vinfo.

Therefore, the first fix is to ensure that, just as for
LOOP_VINFO_EARLY_BREAKS_VECT_PEELED, `peeled_iters' also evaluates to
true for uncounted loops.

The second fix implemented here is: given the relevant logic is
currently hidden behind the `multiple_exits_p', we enable relevant
logic via use of the new function argument `uncounted_p'.

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
        reset IVs and accumulators for all exits for uncounted loops.
---
 gcc/tree-vect-loop-manip.cc | 20 +++++++++++++-------
 gcc/tree-vectorizer.h       |  3 ++-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 640735388fe..6d623e980f2 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1479,7 +1479,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
                                        class loop *scalar_loop,
                                        edge scalar_exit, edge e, edge *new_e,
                                        bool flow_loops,
-                                       vec<basic_block> *updated_doms)
+                                       vec<basic_block> *updated_doms,
+                                       bool uncounted_p)
 {
   class loop *new_loop;
   basic_block *new_bbs, *bbs, *pbbs;
@@ -1650,7 +1651,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
         the continuation values into the epilogue header.
         Do not bother with exit PHIs for the early exits but
         their live virtual operand.  We'll fix up things below.  */
-      if (multiple_exits_p)
+      if (multiple_exits_p || uncounted_p)
        {
          edge loop_e = single_succ_edge (new_preheader);
          new_preheader = split_edge (loop_e);
@@ -1705,7 +1706,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
       if (flow_loops)
        {
          edge loop_entry = single_succ_edge (new_preheader);
-         bool peeled_iters = single_pred (loop->latch) != loop_exit->src;
+         bool peeled_iters = (uncounted_p
+                              || single_pred (loop->latch) != loop_exit->src);
 
          /* Record the new SSA names in the cache so that we can skip
             materializing them again when we fill in the rest of the LC SSA
@@ -1735,7 +1737,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
 
          /* Create the merge PHI nodes in new_preheader and populate the
             arguments for the exits.  */
-         if (multiple_exits_p)
+         if (multiple_exits_p || uncounted_p)
            {
              for (auto gsi_from = gsi_start_phis (loop->header),
                   gsi_to = gsi_start_phis (new_loop->header);
@@ -1787,7 +1789,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
                  /* And adjust the epilog entry value.  */
                  adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
                }
+           }
 
+         if (multiple_exits_p)
+           {
              /* After creating the merge PHIs handle the early exits those
                 should use the values at the start of the loop.  */
              for (auto gsi_from = gsi_start_phis (loop->header),
@@ -1824,7 +1829,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
          /* For the single exit case only create the missing LC PHI nodes
             for the continuation of the loop IVs that are not also already
             reductions and thus had LC PHI nodes on the exit already.  */
-         else
+         if (!multiple_exits_p && !uncounted_p)
            {
              for (auto gsi_from = gsi_start_phis (loop->header),
                   gsi_to = gsi_start_phis (new_loop->header);
@@ -1867,7 +1872,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
       /* Finally after wiring the new epilogue we need to update its main exit
         to the original function exit we recorded.  Other exits are already
         correct.  */
-      if (multiple_exits_p)
+      if (multiple_exits_p || uncounted_p)
        {
          class loop *update_loop = new_loop;
          doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
@@ -3464,7 +3469,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       auto_vec<basic_block> doms;
       epilog
        = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
-                                                 &new_epilog_e, true, &doms);
+                                                 &new_epilog_e, true, &doms,
+                                                 uncounted_p);
 
       LOOP_VINFO_EPILOGUE_IV_EXIT (loop_vinfo) = new_epilog_e;
       gcc_assert (epilog);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 0ae880b15bb..bb761fdc9bb 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2461,7 +2461,8 @@ extern bool slpeel_can_duplicate_loop_p (const class loop 
*, const_edge,
 class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge,
                                                    class loop *, edge,
                                                    edge, edge *, bool = true,
-                                                   vec<basic_block> * = NULL);
+                                                   vec<basic_block> * = NULL,
+                                                   bool=false);
 class loop *vect_loop_versioning (loop_vec_info, gimple *);
 extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
                                    tree *, tree *, tree *, int, bool, bool,
-- 
2.43.0

Reply via email to