https://gcc.gnu.org/g:991c35ee974548156c01cb69f093694973468f51

commit r17-836-g991c35ee974548156c01cb69f093694973468f51
Author: Tamar Christina <[email protected]>
Date:   Wed May 27 10:52:27 2026 +0100

    vect: refactor loop peeling to support explicit flag to redirect early 
exits [PR120352]
    
    This patch series is the first in a few to optimize early break 
vectorization.
    
    The first one addresses that certain loops don't require an epilog at all.
    
    An example is
    
    int a[N] = {0,0,0,1};
    int b[N] = {0,0,0,1};
    
    __attribute__((noipa, noinline))
    int foo ()
    {
      for (int i = 0; i < N; i++)
        {
          if (a[i] > b[i])
            return 1;
        }
      return 0;
    }
    
    where we have no value or side-effect to compute.  Naturally there's no 
need to
    redo any work to just return 1 or 0.
    
    Teaching the vectorizer this however re-enabled epilogue nomask for early 
break
    and so we still need to be able to peel for the epilogues.  This peeling 
however
    should not redirect all the alternative exits to the epilog.  To understand 
when
    this has to happen peeling now gets an extra parameter to indicate how to 
handle
    the multiple exits.
    
    This had an unfortunate interaction with uncounted loops, because uncounted
    loops re-used the layout (with the intermediate merge block) but just being 
a
    fall through block.  When it did this it didn't put all PHI nodes in the 
final
    merge block and as such relied on fixups later.
    
    This made the actual changed needed for not needing epilogs more fragile 
than
    needed so I first refactored peeling to be more consistent between early 
break
    and uncounted loops and insure that all BB now explicitly mention and use 
all
    PHI nodes from the exits.
    
    The code should hopefully be a bit more robust now wrt to needed 
optimizations.
    
    gcc/ChangeLog:
    
            PR tree-optimization/120352
            * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): 
Add
            redirect_exits.
            (vect_do_peeling): Use it.
            * tree-vectorizer.h (slpeel_tree_duplicate_loop_to_edge_cfg): Update
            prototype.

Diff:
---
 gcc/tree-vect-loop-manip.cc | 151 ++++++++++++++++++++++++++++++--------------
 gcc/tree-vectorizer.h       |   3 +-
 2 files changed, 106 insertions(+), 48 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index cd1ea746ae45..3aae0dea25b0 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1484,7 +1484,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
                                        edge scalar_exit, edge e, edge *new_e,
                                        bool flow_loops,
                                        vec<basic_block> *updated_doms,
-                                       bool uncounted_p, bool create_main_e)
+                                       bool uncounted_p, bool create_main_e,
+                                       bool redirect_exits)
 {
   class loop *new_loop;
   basic_block *new_bbs, *bbs, *pbbs;
@@ -1601,7 +1602,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
       }
 
   auto loop_exits = get_loop_exit_edges (loop);
-  bool multiple_exits_p = loop_exits.length () > 1;
+  bool has_multiple_exits_p = loop_exits.length () > 1;
+
+  /* If REDIRECT_EXITS is false we leave the alternative exits untouched and
+     treat the duplication as if the loop only had the main exit.  */
+  bool redirect_multiple_exits_p = redirect_exits && has_multiple_exits_p;
   auto_vec<basic_block> doms;
 
   if (at_exit) /* Add the loop copy at exit.  */
@@ -1641,10 +1646,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
                       loop_exit, UNKNOWN_LOCATION);
        }
 
-      bool multiple_exits_p = loop_exits.length () > 1;
       basic_block main_loop_exit_block = new_preheader;
-      basic_block alt_loop_exit_block = NULL;
-      /* Create the CFG for multiple exits.
+      basic_block alt_loop_exit_block = new_preheader;
+      /* When we redirect the other exits create the CFG
+        below to funnel everything through the merge block:
                   | loop_exit               | alt1   | altN
                   v                         v   ...  v
            main_loop_exit_block:       alt_loop_exit_block:
@@ -1655,39 +1660,46 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
         the continuation values into the epilogue header.
         Do not bother with exit PHIs for the early exits but
         their live virtual operand.  We'll fix up things below.  */
-      if (multiple_exits_p || uncounted_p)
+      if (redirect_multiple_exits_p || uncounted_p)
        {
          edge loop_e = single_succ_edge (new_preheader);
          new_preheader = split_edge (loop_e);
 
-         gphi *vphi = NULL;
-         alt_loop_exit_block = new_preheader;
-         for (auto exit : loop_exits)
-           if (exit != loop_exit)
-             {
-               tree vphi_def = NULL_TREE;
-               if (gphi *evphi = get_virtual_phi (exit->dest))
-                 vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
-               edge res = redirect_edge_and_branch (exit, alt_loop_exit_block);
-               gcc_assert (res == exit);
-               redirect_edge_var_map_clear (exit);
-               if (alt_loop_exit_block == new_preheader)
-                 alt_loop_exit_block = split_edge (exit);
-               if (!need_virtual_phi)
-                 continue;
-               /* When the edge has no virtual LC PHI get at the live
-                  virtual operand by other means.  */
-               if (!vphi_def)
-                 vphi_def = get_live_virtual_operand_on_edge (exit);
-               if (!vphi)
-                 vphi = create_phi_node (copy_ssa_name (vphi_def),
+       if (redirect_exits)
+         {
+           gphi *vphi = NULL;
+           alt_loop_exit_block = new_preheader;
+           for (auto exit : loop_exits)
+             if (exit != loop_exit)
+               {
+                 tree vphi_def = NULL_TREE;
+                 if (gphi *evphi = get_virtual_phi (exit->dest))
+                   vphi_def = gimple_phi_arg_def_from_edge (evphi, exit);
+                 edge res
+                   = redirect_edge_and_branch (exit, alt_loop_exit_block);
+                 gcc_assert (res == exit);
+                 redirect_edge_var_map_clear (exit);
+
+                 if (alt_loop_exit_block == new_preheader)
+                   alt_loop_exit_block = split_edge (exit);
+                 if (!need_virtual_phi)
+                   continue;
+
+                 /* When the edge has no virtual LC PHI get at the live
+                    virtual operand by other means.  */
+                 if (!vphi_def)
+                   vphi_def = get_live_virtual_operand_on_edge (exit);
+
+                 if (!vphi)
+                   vphi = create_phi_node (copy_ssa_name (vphi_def),
                                          alt_loop_exit_block);
-               else
-                 /* Edge redirection might re-allocate the PHI node
-                    so we have to rediscover it.  */
-                 vphi = get_virtual_phi (alt_loop_exit_block);
-               add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
-             }
+                 else
+                   /* Edge redirection might re-allocate the PHI node
+                      so we have to rediscover it.  */
+                   vphi = get_virtual_phi (alt_loop_exit_block);
+                 add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION);
+               }
+         }
 
          set_immediate_dominator (CDI_DOMINATORS, new_preheader,
                                   loop->header);
@@ -1741,7 +1753,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
 
          /* Create the merge PHI nodes in new_preheader and populate the
             arguments for the exits.  */
-         if (multiple_exits_p || uncounted_p)
+         if (redirect_multiple_exits_p)
            {
              for (auto gsi_from = gsi_start_phis (loop->header),
                   gsi_to = gsi_start_phis (new_loop->header);
@@ -1795,7 +1807,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
                }
            }
 
-         if (multiple_exits_p)
+         if (redirect_multiple_exits_p)
            {
              /* After creating the merge PHIs handle the early exits those
                 should use the values at the start of the loop.  */
@@ -1826,14 +1838,24 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
                        SET_PHI_ARG_DEF (lc_phi, i, alt_arg);
                      alt_arg = alt_def;
                    }
+
+                 /* The merge PHIs live in NEW_PREHEADER; their
+                    alternative argument always comes from the
+                    successor edge of ALT_LOOP_EXIT_BLOCK.  */
                  edge alt_e = single_succ_edge (alt_loop_exit_block);
                  SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg);
                }
            }
+
          /* For the single exit case only create the missing LC PHI nodes
             for the continuation of the loop IVs that are not also already
-            reductions and thus had LC PHI nodes on the exit already.  */
-         if (!multiple_exits_p && !uncounted_p)
+            reductions and thus had LC PHI nodes on the exit already.  When
+            we are not redirecting the alternative exits the layout is:
+
+                  loop_exit ---> new_preheader ---> epilog
+                  alt_exit ---------------> original dest
+          */
+         if (!redirect_multiple_exits_p)
            {
              for (auto gsi_from = gsi_start_phis (loop->header),
                   gsi_to = gsi_start_phis (new_loop->header);
@@ -1842,21 +1864,54 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
*loop, edge loop_exit,
                {
                  gimple *from_phi = gsi_stmt (gsi_from);
                  gimple *to_phi = gsi_stmt (gsi_to);
-                 tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
-                                                       loop_latch_edge (loop));
+                 tree new_arg;
+
+                 /* Use the value on the exiting path.  When the exit is from
+                    the latch edge we want the post-iteration value on that
+                    edge; when the exit is from the loop header (before the
+                    latch ever executes) we must use the current header value,
+                    otherwise we pick up a name that was never defined.  */
+                 if (!has_multiple_exits_p && !uncounted_p)
+                   new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi,
+                                                    loop_latch_edge (loop));
+                 else
+                   new_arg = gimple_phi_result (from_phi);
+
+                 /* Re-use the virtual LC PHI we already built when we are not
+                    redirecting the other exits to avoid creating duplicate
+                    virtual SSA names.  */
+                 if (virtual_operand_p (new_arg))
+                   {
+                     if (gphi *vphi = get_virtual_phi (main_loop_exit_block))
+                       {
+                         adjust_phi_and_debug_stmts (to_phi, loop_entry,
+                                                     gimple_phi_result (vphi));
+                         continue;
+                       }
+                   }
 
                  /* Check if we've already created a new phi node during edge
                     redirection.  If we have, only propagate the value
                     downwards.  */
                  if (tree *res = new_phi_args.get (new_arg))
                    {
-                     adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
-                     continue;
+                     /* Check if the new dest block already contains a use.  */
+                     gimple *stmt = SSA_NAME_DEF_STMT (*res);
+
+                     /* If the value already exist, just update the destination
+                        and if it doesn't we want a new node.  */
+                     if (gimple_bb (stmt) == main_loop_exit_block)
+                       {
+                         adjust_phi_and_debug_stmts (to_phi, loop_entry, *res);
+                         continue;
+                       }
+                     else
+                       new_arg = *res;
                    }
 
                  tree new_res = copy_ssa_name (gimple_phi_result (from_phi));
-                 gphi *lcssa_phi = create_phi_node (new_res, new_preheader);
-                 SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg);
+                 gphi *lcssa_phi = create_phi_node (new_res, 
main_loop_exit_block);
+                 SET_PHI_ARG_DEF (lcssa_phi, loop_exit->dest_idx, new_arg);
                  adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res);
                }
            }
@@ -1876,7 +1931,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
       /* Finally after wiring the new epilogue we need to update its main exit
         to the original function exit we recorded.  Other exits are already
         correct.  */
-      if (multiple_exits_p || uncounted_p)
+      if (has_multiple_exits_p || uncounted_p)
        {
          class loop *update_loop = new_loop;
          doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
@@ -2000,7 +2055,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, 
edge loop_exit,
                               loop_preheader_edge (new_loop)->src);
 
       /* Update dominators for multiple exits.  */
-      if (multiple_exits_p || create_main_e)
+      if (has_multiple_exits_p || create_main_e)
        {
          for (edge alt_e : loop_exits)
            {
@@ -3449,7 +3504,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
                                                       scalar_loop, scalar_e,
                                                       e, &prolog_e, true, NULL,
-                                                      false, uncounted_p);
+                                                      uncounted_p, uncounted_p,
+                                                      true);
 
       gcc_assert (prolog);
       prolog->force_vectorize = false;
@@ -3564,7 +3620,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       epilog
        = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
                                                  &new_epilog_e, true, &doms,
-                                                 uncounted_p);
+                                                 uncounted_p, false,
+                                                 true);
 
       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
       gcc_assert (epilog);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 3a01e1be0f15..6d7393809013 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2486,7 +2486,8 @@ class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class 
loop *, edge,
                                                    class loop *, edge,
                                                    edge, edge *, bool = true,
                                                    vec<basic_block> * = NULL,
-                                                   bool = false, bool = false);
+                                                   bool = false, bool = false,
+                                                   bool = true);
 class loop *vect_loop_versioning (loop_vec_info, gimple *);
 extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
                                    tree *, tree *, tree *, int, bool, bool,

Reply via email to