RE: [PATCH 3/21]middle-end: Implement code motion and dependency analysis for early breaks

Tamar Christina Tue, 19 Dec 2023 02:12:13 -0800

> > > > +      /* Save destination as we go, BB are visited in order and the 
> > > > last one
> > > > +       is where statements should be moved to.  */
> > > > +      if (!dest_bb)
> > > > +       dest_bb = gimple_bb (c);
> > > > +      else
> > > > +       {
> > > > +         basic_block curr_bb = gimple_bb (c);
> > > > +         if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
> > > > +           dest_bb = curr_bb;
> > > > +       }
> > > > +    }
> > > > +
> > > > +  dest_bb = FALLTHRU_EDGE (dest_bb)->dest;
> > >
> > > no edge is the fallthru edge out of a condition, so this always selects
> > > EDGE_SUCC (dest_bb, 1) which cannot be correct (well, guess you're 
> > > lucky).  I
> > > think you instead want
> > >
> > >   dest_bb = EDGE_SUCC (dest_bb, 0)->dest->loop_father == dest_bb-
> > > >loop_father ? EDGE_SUCC (dest_bb, 0)->dest : EDGE_SUCC (dest_bb, 1)-
> > > >dest;
> > >
> > > more nicely written, of course.
> > >
> > > > +  gcc_assert (dest_bb);
> > > > +  LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
> > >
> > > Sorting the vector of early breaks as we gather them might be nicer than 
> > > this -
> > > you'd then simply use the first or last.
> > >


I opted not to do the sorting since I don't really need a full order between 
the exits here
And only need to find the last one.   A sort would be more expensive than the 
linear
Check here.  But I also couldn't think of a good sort key since all you have is 
dominate yes/no.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): New.
        (vect_analyze_data_ref_dependences): Use them.
        * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
        early_breaks.
        (move_early_exit_stmts): New.
        (vect_transform_loop): use it/
        * tree-vect-stmts.cc (vect_is_simple_use): Use vect_early_exit_def.
        * tree-vectorizer.h (enum vect_def_type): Add vect_early_exit_def.
        (class _loop_vec_info): Add early_breaks, early_break_conflict,
        early_break_vuses.
        (LOOP_VINFO_EARLY_BREAKS): New.
        (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS): New.
        (LOOP_VINFO_EARLY_BRK_DEST_BB): New.
        (LOOP_VINFO_EARLY_BRK_VUSES): New.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-early-break_57.c: New test.
        * gcc.dg/vect/vect-early-break_79.c: New test.
        * gcc.dg/vect/vect-early-break_80.c: New test.
        * gcc.dg/vect/vect-early-break_81.c: New test.
        * gcc.dg/vect/vect-early-break_83.c: New test.

--- inline copy of patch ---

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
index 
be4a0c7426093059ce37a9f824defb7ae270094d..9a4e795f92b7a8577ac71827f5cb0bd15d88ebe1
 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c
@@ -5,6 +5,7 @@
 /* { dg-additional-options "-Ofast" } */
 
 /* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */
 
 void abort ();
 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c
new file mode 100644
index 
0000000000000000000000000000000000000000..a26011ef1ba5aa000692babc90d46621efc2f8b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_79.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+#undef N
+#define N 32
+
+unsigned vect_a[N];
+unsigned vect_b[N];
+  
+unsigned test4(unsigned x)
+{
+ unsigned ret = 0;
+ for (int i = 0; i < 1024; i++)
+ {
+   vect_b[i] = x + i;
+   if (vect_a[i] > x)
+     break;
+   vect_a[i] = x;
+   
+ }
+ return ret;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c
new file mode 100644
index 
0000000000000000000000000000000000000000..ddf504e0c8787ae33a0e98045c1c91f2b9f533a9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_80.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+extern void abort ();
+
+int x;
+__attribute__ ((noinline, noipa))
+void foo (int *a, int *b)
+{
+  int local_x = x;
+  for (int i = 0; i < 1024; ++i)
+    {
+      if (i + local_x == 13)
+        break;
+      a[i] = 2 * b[i];
+    }
+}
+
+int main ()
+{
+  int a[1024] = {0};
+  int b[1024] = {0};
+
+  for (int i = 0; i < 1024; i++)
+    b[i] = i;
+
+  x = -512;
+  foo (a, b);
+
+  if (a[524] != 1048)
+    abort ();
+
+  if (a[525] != 0)
+    abort ();
+
+  if (a[1023] != 0)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c
new file mode 100644
index 
0000000000000000000000000000000000000000..c38e394ad87863f0702d422cb58018b979c9fba6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */
+void abort ();
+
+unsigned short sa[32];
+unsigned short sc[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+unsigned short sb[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+  16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
+unsigned int ia[32];
+unsigned int ic[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
+        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+unsigned int ib[32] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
+        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+int main2 (int n)
+{
+  int i;
+  for (i = 0; i < n - 3; i++)
+    {
+      if (sa[i+3] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
+        abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c
new file mode 100644
index 
0000000000000000000000000000000000000000..227dcf1b7ab2ace149e692a6aab41cdd5d47d098
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_83.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-additional-options "-Ofast" } */
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
+
+#include <complex.h>
+
+#define N 1024
+complex double vect_a[N];
+complex double vect_b[N];
+  
+complex double test4(complex double x)
+{
+ complex double ret = 0;
+ for (int i = 0; i < N; i++)
+ {
+   volatile complex double z = vect_b[i];
+   vect_b[i] = x + i + z;
+   if (vect_a[i] == x)
+     return i;
+   vect_a[i] += x * vect_b[i];
+   
+ }
+ return ret;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 
d5c9c4a11c2e5d8fd287f412bfa86d081c2f8325..8e9e780e01fd349b30da1f0a762c0306ec257ff7
 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -613,6 +613,377 @@ vect_analyze_data_ref_dependence (struct 
data_dependence_relation *ddr,
   return opt_result::success ();
 }
 
+/* Funcion vect_analyze_early_break_dependences.
+
+   Examime all the data references in the loop and make sure that if we have
+   mulitple exits that we are able to safely move stores such that they become
+   safe for vectorization.  The function also calculates the place where to 
move
+   the instructions to and computes what the new vUSE chain should be.
+
+   This works in tandem with the CFG that will be produced by
+   slpeel_tree_duplicate_loop_to_edge_cfg later on.
+
+   This function tries to validate whether an early break vectorization
+   is possible for the current instruction sequence. Returns True i
+   possible, otherwise False.
+
+   Requirements:
+     - Any memory access must be to a fixed size buffer.
+     - There must not be any loads and stores to the same object.
+     - Multiple loads are allowed as long as they don't alias.
+
+   NOTE:
+     This implemementation is very conservative. Any overlappig loads/stores
+     that take place before the early break statement gets rejected aside from
+     WAR dependencies.
+
+     i.e.:
+
+       a[i] = 8
+       c = a[i]
+       if (b[i])
+         ...
+
+       is not allowed, but
+
+       c = a[i]
+       a[i] = 8
+       if (b[i])
+         ...
+
+       is which is the common case.  */
+
+static opt_result
+vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
+{
+  DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
+
+  /* - CHAIN: Currently detected sequence of instructions that need to be moved
+             if we are to vectorize this early break.
+     - FIXED: Sequences of SSA_NAMEs that must not be moved, they are reachable
+             from one or more cond conditions.  If this set overlaps with CHAIN
+             then FIXED takes precedence.  This deals with non-single use
+             cases.
+     - BASES: List of all load data references found during traversal.  */
+  hash_set<tree> chain, fixed;
+  auto_vec<data_reference *> bases;
+  basic_block dest_bb = NULL;
+
+  hash_set <gimple *> visited;
+  use_operand_p use_p;
+  ssa_op_iter iter;
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  class loop *loop_nest = loop_outer (loop);
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "loop contains multiple exits, analyzing"
+                    " statement dependencies.\n");
+
+  for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
+    {
+      stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
+      if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
+       continue;
+
+      gimple_stmt_iterator gsi = gsi_for_stmt (c);
+
+      /* First determine the list of statements that we can't move because they
+        are required for the early break vectorization itself.  */
+      auto_vec <gimple *> workset;
+      workset.safe_push (c);
+      do {
+       gimple *op = workset.pop ();
+       if (visited.add (op)
+           || is_a <gphi *> (op)
+           || is_gimple_debug (op))
+         continue;
+
+       if (gimple_has_lhs (op))
+         fixed.add (gimple_get_lhs (op));
+
+       stmt_vec_info def_info = loop_vinfo->lookup_stmt (op);
+       if (!def_info)
+         continue;
+
+       gimple *def_stmt = STMT_VINFO_STMT (def_info);
+       FOR_EACH_SSA_USE_OPERAND (use_p, def_stmt, iter, SSA_OP_USE)
+         {
+           tree use = USE_FROM_PTR (use_p);
+           if (TREE_CODE (use) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (use))
+             continue;
+
+           if (gimple *g = SSA_NAME_DEF_STMT (use))
+             workset.safe_push (g);
+         }
+      } while (!workset.is_empty ());
+
+      /* Now analyze all the remaining statements and try to determine which
+        instructions are allowed/needed to be moved.  */
+      while (!gsi_end_p (gsi))
+       {
+         gimple *stmt = gsi_stmt (gsi);
+         gsi_prev (&gsi);
+         if (!gimple_has_ops (stmt)
+             || is_gimple_debug (stmt))
+           continue;
+
+         tree dest = NULL_TREE;
+         /* Try to find the SSA_NAME being defined.  For Statements with an LHS
+            use the LHS, if not, assume that the first argument of a call is
+            the value being defined.  e.g. MASKED_LOAD etc.  */
+         if (gimple_has_lhs (stmt))
+           dest = gimple_get_lhs (stmt);
+         else if (const gcall *call = dyn_cast <const gcall *> (stmt))
+           dest = gimple_arg (call, 0);
+
+         bool move = chain.contains (dest);
+
+         stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
+         if (!stmt_vinfo)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "early breaks not supported. Unknown"
+                                " statement: %G", stmt);
+             return opt_result::failure_at (c,
+                                      "can't safely apply code motion to "
+                                      "dependencies of %G to vectorize "
+                                      "the early exit.\n", c);
+           }
+
+         auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
+         if (dr_ref)
+           {
+             /* We currently only support statically allocated objects due to
+                not having first-faulting loads support or peeling for
+                alignment support.  Compute the size of the referenced object
+                (it could be dynamically allocated).  */
+             tree obj = DR_BASE_ADDRESS (dr_ref);
+             if (!obj || TREE_CODE (obj) != ADDR_EXPR)
+               {
+                 if (dump_enabled_p ())
+                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                    "early breaks only supported on statically"
+                                    " allocated objects.\n");
+                 return opt_result::failure_at (c,
+                                    "can't safely apply code motion to "
+                                    "dependencies of %G to vectorize "
+                                    "the early exit.\n", c);
+               }
+
+             tree refop = TREE_OPERAND (obj, 0);
+             tree refbase = get_base_address (refop);
+             if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
+                 || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
+               {
+                 if (dump_enabled_p ())
+                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                      "early breaks only supported on"
+                                      " statically allocated objects.\n");
+                 return opt_result::failure_at (c,
+                                      "can't safely apply code motion to "
+                                      "dependencies of %G to vectorize "
+                                      "the early exit.\n", c);
+               }
+
+             /* Check if vector accesses to the object will be within
+                bounds.  */
+             tree stype = TREE_TYPE (DECL_SIZE (refbase));
+             tree access = fold_build2 (PLUS_EXPR, stype, DR_OFFSET (dr_ref),
+                                        DR_INIT (dr_ref));
+             tree final_adj
+               = fold_build2 (MULT_EXPR, stype, LOOP_VINFO_NITERS (loop_vinfo),
+                              DR_STEP (dr_ref));
+
+             /* must be a constant or assume loop will be versioned or niters
+                bounded by VF so accesses are within range.  */
+             if (TREE_CODE (access) == INTEGER_CST
+                 && TREE_CODE (final_adj) == INTEGER_CST)
+               {
+                 access = fold_build2 (PLUS_EXPR, stype, access, final_adj);
+                 wide_int size = wi::to_wide (DECL_SIZE (refbase));
+                 wide_int off = wi::to_wide (access);
+                 if (wi::ge_p (off, size, UNSIGNED))
+                   {
+                     if (dump_enabled_p ())
+                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                        "early breaks not supported:"
+                                        " vectorization would read beyond size"
+                                        " of object %T.\n", obj);
+                     return opt_result::failure_at (c,
+                                        "can't safely apply code motion to "
+                                        "dependencies of %G to vectorize "
+                                        "the early exit.\n", c);
+                   }
+               }
+
+             if (DR_IS_READ (dr_ref))
+               bases.safe_push (dr_ref);
+             else if (DR_IS_WRITE (dr_ref))
+               {
+                 /* We are moving writes down in the CFG.  To be sure that this
+                    is valid after vectorization we have to check all the loads
+                    we are hoisting the stores past to see if any of them may
+                    alias or are the same object.
+
+                    Same objects will not be an issue because unless the store
+                    is marked volatile the value can be forwarded.  If the
+                    store is marked volatile we don't vectorize the loop
+                    anyway.
+
+                    That leaves the check for aliasing.  We don't really need
+                    to care about the stores aliasing with each other since the
+                    stores are moved in order so the effects are still observed
+                    correctly.  This leaves the check for WAR dependencies
+                    which we would be introducing here if the DR can alias.
+                    The check is quadratic in loads/stores but I have not found
+                    a better API to do this.  I believe all loads and stores
+                    must be checked.  We also must check them when we
+                    encountered the store, since we don't care about loads past
+                    the store.  */
+
+                 for (auto dr_read : bases)
+                   if (dr_may_alias_p (dr_read, dr_ref, loop_nest))
+                     {
+                       if (dump_enabled_p ())
+                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+                                            vect_location,
+                                            "early breaks not supported: "
+                                            "overlapping loads and stores "
+                                            "found before the break "
+                                            "statement.\n");
+
+                       return opt_result::failure_at (stmt,
+                            "can't safely apply code motion to dependencies"
+                            " to vectorize the early exit. %G may alias with"
+                            " %G\n", stmt, dr_read->stmt);
+                     }
+
+                 /* Any writes starts a new chain. */
+                 move = true;
+               }
+           }
+
+         /* If a statement is live and escapes the loop through usage in the
+            loop epilogue then we can't move it since we need to maintain its
+            reachability through all exits.  */
+         bool skip = false;
+         if (STMT_VINFO_LIVE_P (stmt_vinfo)
+             && !(dr_ref && DR_IS_WRITE (dr_ref)))
+           {
+             imm_use_iterator imm_iter;
+             use_operand_p use_p;
+             FOR_EACH_IMM_USE_FAST (use_p, imm_iter, dest)
+               {
+                 basic_block bb = gimple_bb (USE_STMT (use_p));
+                 skip = bb == LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
+                 if (skip)
+                   break;
+               }
+           }
+
+         /* If we found the defining statement of a something that's part of
+            the chain then expand the chain with the new SSA_VARs being
+            used.  */
+         if (!skip && move)
+           {
+             use_operand_p use_p;
+             ssa_op_iter iter;
+             FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
+               {
+                 tree op = USE_FROM_PTR (use_p);
+                 gcc_assert (TREE_CODE (op) == SSA_NAME);
+                 if (fixed.contains (dest))
+                   {
+                     move = false;
+                     fixed.add (op);
+                   }
+                 else
+                   chain.add (op);
+               }
+
+             if (dump_enabled_p ())
+               {
+                 if (move)
+                   dump_printf_loc (MSG_NOTE, vect_location,
+                                    "found chain %G", stmt);
+                 else
+                   dump_printf_loc (MSG_NOTE, vect_location,
+                                    "ignored chain %G, not single use", stmt);
+               }
+           }
+
+         if (move)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "==> recording stmt %G", stmt);
+
+             /* If we've moved a VDEF, extract the defining MEM and update
+                usages of it.   */
+             tree vdef;
+             /* This statement is to be moved.  */
+             if ((vdef = gimple_vdef (stmt)))
+               LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).safe_push (
+                   stmt);
+           }
+
+         if (gimple_vuse (stmt) && !gimple_vdef (stmt))
+           {
+             LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (0, stmt);
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "marked statement for vUSE update: %G", stmt);
+           }
+       }
+
+      /* Save destination as we go, BB are visited in order and the last one
+       is where statements should be moved to.  */
+      if (!dest_bb)
+       dest_bb = gimple_bb (c);
+      else
+       {
+         basic_block curr_bb = gimple_bb (c);
+         if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
+           dest_bb = curr_bb;
+       }
+
+      /* Mark the statement as a condition.  */
+      STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def;
+    }
+
+  basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest;
+  basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest;
+  dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1;
+  /* We don't allow outer -> inner loop transitions which should have been
+     trapped already during loop form analysis.  */
+  gcc_assert (dest_bb->loop_father == loop);
+
+  gcc_assert (dest_bb);
+  LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+
+  if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
+    {
+      /* All uses shall be updated to that of the first load.  Entries are
+        stored in reverse order.  */
+      tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last 
());
+      for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+       {
+         if (dump_enabled_p ())
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "will update use: %T, mem_ref: %G", vuse, g);
+       }
+    }
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "recorded statements to be moved to BB %d\n",
+                    LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
+
+  return opt_result::success ();
+}
+
 /* Function vect_analyze_data_ref_dependences.
 
    Examine all the data references in the loop, and make sure there do not
@@ -657,6 +1028,11 @@ vect_analyze_data_ref_dependences (loop_vec_info 
loop_vinfo,
          return res;
       }
 
+  /* If we have early break statements in the loop, check to see if they
+     are of a form we can vectorizer.  */
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    return vect_analyze_early_break_dependences (loop_vinfo);
+
   return opt_result::success ();
 }
 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
fb8d999ee6bfaff551ac06ac2f3aea5354914659..0a90d2860b8d037b72fd41d4240804aa390467ea
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1040,6 +1040,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     partial_load_store_bias (0),
     peeling_for_gaps (false),
     peeling_for_niter (false),
+    early_breaks (false),
     no_data_dependencies (false),
     has_mask_store (false),
     scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -11548,6 +11549,56 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree 
advance)
   epilogue_vinfo->shared->save_datarefs ();
 }
 
+/*  When vectorizing early break statements instructions that happen before
+    the early break in the current BB need to be moved to after the early
+    break.  This function deals with that and assumes that any validity
+    checks has already been performed.
+
+    While moving the instructions if it encounters a VUSE or VDEF it then
+    corrects the VUSES as it moves the statements along.  GDEST is the location
+    in which to insert the new statements.  */
+
+static void
+move_early_exit_stmts (loop_vec_info loop_vinfo)
+{
+  DUMP_VECT_SCOPE ("move_early_exit_stmts");
+
+  if (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).is_empty ())
+    return;
+
+  /* Move all stmts that need moving.  */
+  basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
+  gimple_stmt_iterator dest_gsi = gsi_start_bb (dest_bb);
+
+  for (gimple *stmt : LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo))
+    {
+      /* Check to see if statement is still required for vect or has been
+        elided.  */
+      auto stmt_info = loop_vinfo->lookup_stmt (stmt);
+      if (!stmt_info)
+       continue;
+
+      if (dump_enabled_p ())
+       dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
+
+      gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
+      gsi_move_before (&stmt_gsi, &dest_gsi);
+      gsi_prev (&dest_gsi);
+    }
+
+  /* Update all the stmts with their new reaching VUSES.  */
+  tree vuse
+    = gimple_vuse (LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS (loop_vinfo).last ());
+  for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "updating vuse to %T for load %G", vuse, p);
+      gimple_set_vuse (p, vuse);
+      update_stmt (p);
+    }
+}
+
 /* Function vect_transform_loop.
 
    The analysis phase has determined that the loop is vectorizable.
@@ -11697,6 +11748,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple 
*loop_vectorized_call)
       vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
     }
 
+  /* Handle any code motion that we need to for early-break vectorization after
+     we've done peeling but just before we start vectorizing.  */
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+    move_early_exit_stmts (loop_vinfo);
+
   /* FORNOW: the vectorizer supports only loops which body consist
      of one basic block (header + empty latch). When the vectorizer will
      support more involved loop forms, the order by which the BBs are
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
96e4a6cffadebb43946c5cb7e9849c915da589bc..b3a09c0a804a38e17ef32b6ce13b98b077459fc7
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -359,8 +359,8 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
   *live_p = false;
 
   /* cond stmt other than loop exit cond.  */
-  if (is_ctrl_stmt (stmt_info->stmt)
-      && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
+  gimple *stmt = STMT_VINFO_STMT (stmt_info);
+  if (dyn_cast <gcond *> (stmt))
     *relevant = vect_used_in_scope;
 
   /* changing memory.  */
@@ -13530,6 +13530,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum 
vect_def_type *dt,
        case vect_first_order_recurrence:
          dump_printf (MSG_NOTE, "first order recurrence\n");
          break;
+       case vect_condition_def:
+         dump_printf (MSG_NOTE, "control flow\n");
+         break;
        case vect_unknown_def_type:
          dump_printf (MSG_NOTE, "unknown\n");
          break;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 
e4d7ab4567cef3c018b958f98eeff045d3477725..3c9478a3dc8750c71e0bf2a36a5b0815afc3fd94
 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -66,6 +66,7 @@ enum vect_def_type {
   vect_double_reduction_def,
   vect_nested_cycle,
   vect_first_order_recurrence,
+  vect_condition_def,
   vect_unknown_def_type
 };
 
@@ -888,6 +889,10 @@ public:
      we need to peel off iterations at the end to form an epilogue loop.  */
   bool peeling_for_niter;
 
+  /* When the loop has early breaks that we can vectorize we need to peel
+     the loop for the break finding loop.  */
+  bool early_breaks;
+
   /* List of loop additional IV conditionals found in the loop.  */
   auto_vec<gcond *> conds;
 
@@ -942,6 +947,20 @@ public:
   /* The controlling loop IV for the scalar loop being vectorized.  This IV
      controls the natural exits of the loop.  */
   edge scalar_loop_iv_exit;
+
+  /* Used to store the list of statements needing to be moved if doing early
+     break vectorization as they would violate the scalar loop semantics if
+     vectorized in their current location.  These are stored in order that 
they need
+     to be moved.  */
+  auto_vec<gimple *> early_break_conflict;
+
+  /* The final basic block where to move statements to.  In the case of
+     multiple exits this could be pretty far away.  */
+  basic_block early_break_dest_bb;
+
+  /* Statements whose VUSES need updating if early break vectorization is to
+     happen.  */
+  auto_vec<gimple*> early_break_vuses;
 } *loop_vec_info;
 
 /* Access Functions.  */
@@ -996,6 +1015,10 @@ public:
 #define LOOP_VINFO_REDUCTION_CHAINS(L)     (L)->reduction_chains
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
+#define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
+#define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
+#define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
 #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
 #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies

rb17963.patch
Description: rb17963.patch

RE: [PATCH 3/21]middle-end: Implement code motion and dependency analysis for early breaks

Reply via email to