On Mon, 29 Jan 2024, Tamar Christina wrote:

> Hi All,
> 
> When analyzing loads for early break it was always the intention that 
> for the exit where things get moved to we only check the loads that can 
> be reached from the condition.

Looking at the code I'm a bit confused that we always move to
single_pred (loop->latch) - IIRC that was different at some point?

Shouldn't we move stores after the last early exit condition instead?

In particular for the peeled case single_pred (loop->latch) is the
block with the actual early exit condition?  So for that case we'd
need to move to the latch itself instead?  For non-peeled we move
to the block with the IV condition which looks OK.

> However the main loop checks all loads and we skip the destination BB.  
> As such we never actually check the loads reachable from the COND in the 
> last BB unless this BB was also the exit chosen by the vectorizer.
> 
> This leads us to incorrectly vectorize the loop in the PR and in doing so 
> access
> out of bounds.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

The patch ends up with a worklist and another confusing comment

+  /* For the destination BB we need to only analyze loads reachable from 
the early
+     break statement itself.  */

But I think it's a downstream issue from the issue above.  That said,
even for the non-peeled case we need to check ref_within_array_bound,
no?

So what about re-doing that initial loop like the following instead
(and also fix dest_bb, but I'd like clarification here).  Basically
walk all blocks, do the ref_within_array_bound first and only
after we've seen 'dest_bb' do the checks required for moving
stores for all upstream BBs.

And dest_bb should be

  /* Move side-effects to the in-loop destination of the last early
     exit.  */
  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
    dest_bb = loop->latch;
  else
    dest_bb = single_pred (loop->latch);


diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index f592aeb8028..d6c8910dd6c 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
   auto_vec<data_reference *> bases;
   basic_block dest_bb = NULL;
 
-  hash_set <gimple *> visited;
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   class loop *loop_nest = loop_outer (loop);
 
@@ -681,15 +680,11 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
      side-effects to is always the latch connected exit.  When we support
      general control flow we can do better but for now this is fine.  */
   dest_bb = single_pred (loop->latch);
-  basic_block bb = dest_bb;
+  basic_block bb = loop->latch;
+  bool check_deps = false;
 
   do
     {
-      /* If the destination block is also the header then we have nothing to 
do.  */
-      if (!single_pred_p (bb))
-       continue;
-
-      bb = single_pred (bb);
       gimple_stmt_iterator gsi = gsi_last_bb (bb);
 
       /* Now analyze all the remaining statements and try to determine which
@@ -707,6 +702,25 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
          if (!dr_ref)
            continue;
 
+         /* Check if vector accesses to the object will be within bounds.
+            must be a constant or assume loop will be versioned or niters
+            bounded by VF so accesses are within range.  */
+         if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "early breaks not supported: vectorization "
+                                "would %s beyond size of obj.",
+                                DR_IS_READ (dr_ref) ? "read" : "write");
+             return opt_result::failure_at (stmt,
+                                "can't safely apply code motion to "
+                                "dependencies of %G to vectorize "
+                                "the early exit.\n", stmt);
+           }
+
+         if (!check_deps)
+           continue;
+
          /* We currently only support statically allocated objects due to
             not having first-faulting loads support or peeling for
             alignment support.  Compute the size of the referenced object
@@ -739,22 +753,6 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
                                 "the early exit.\n", stmt);
            }
 
-         /* Check if vector accesses to the object will be within bounds.
-            must be a constant or assume loop will be versioned or niters
-            bounded by VF so accesses are within range.  */
-         if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
-           {
-             if (dump_enabled_p ())
-               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "early breaks not supported: vectorization "
-                                "would %s beyond size of obj.",
-                                DR_IS_READ (dr_ref) ? "read" : "write");
-             return opt_result::failure_at (stmt,
-                                "can't safely apply code motion to "
-                                "dependencies of %G to vectorize "
-                                "the early exit.\n", stmt);
-           }
-
          if (DR_IS_READ (dr_ref))
            bases.safe_push (dr_ref);
          else if (DR_IS_WRITE (dr_ref))
@@ -814,8 +812,16 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
                                 "marked statement for vUSE update: %G", stmt);
            }
        }
+      if (!single_pred_p (bb))
+       {
+         gcc_assert (bb == loop->header);
+         break;
+       }
+      if (bb == dest_bb)
+       check_deps = true;
+      bb = single_pred (bb);
     }
-  while (bb != loop->header);
+  while (1);
 
   /* We don't allow outer -> inner loop transitions which should have been
      trapped already during loop form analysis.  */

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/113588
>       * tree-vect-data-refs.cc (vect_analyze_early_break_dependences_1): New.
>       (vect_analyze_data_ref_dependence):  Use it.
>       (vect_analyze_early_break_dependences): Update comments.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/113588
>       * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
>       * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> +   ++len;
> + return len;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> @@ -0,0 +1,44 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target mmap } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#include <sys/mman.h>
> +#include <unistd.h>
> +
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> +   ++len;
> + return len;
> +}
> +
> +int main()
> +{
> +
> +  check_vect ();
> +
> +  long pgsz = sysconf (_SC_PAGESIZE);
> +  void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> +     MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> +  if (p == MAP_FAILED)
> +    return 0;
> +  mprotect (p, pgsz, PROT_NONE);
> +  mprotect (p+2*pgsz, pgsz, PROT_NONE);
> +  char *p1 = p + pgsz;
> +  p1[0] = 1;
> +  p1[1] = 0;
> +  foo (p1, 1000);
> +  p1 = p + 2*pgsz - 2;
> +  p1[0] = 1;
> +  p1[1] = 0;
> +  foo (p1, 1000);
> +  return 0;
> +}
> +
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 
> f592aeb8028afd4fd70e2175104efab2a2c0d82e..52cef242a7ce5d0e525bff639fa1dc2f0a6f30b9
>  100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -619,10 +619,69 @@ vect_analyze_data_ref_dependence (struct 
> data_dependence_relation *ddr,
>    return opt_result::success ();
>  }
>  
> -/* Funcion vect_analyze_early_break_dependences.
> +/* Function vect_analyze_early_break_dependences_1
>  
> -   Examime all the data references in the loop and make sure that if we have
> -   mulitple exits that we are able to safely move stores such that they 
> become
> +   Helper function of vect_analyze_early_break_dependences which performs 
> safety
> +   analysis for load operations in an early break.  */
> +
> +static opt_result
> +vect_analyze_early_break_dependences_1 (data_reference *dr_ref, gimple *stmt)
> +{
> +  /* We currently only support statically allocated objects due to
> +     not having first-faulting loads support or peeling for
> +     alignment support.  Compute the size of the referenced object
> +     (it could be dynamically allocated).  */
> +  tree obj = DR_BASE_ADDRESS (dr_ref);
> +  if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> +    {
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                      "early breaks only supported on statically"
> +                      " allocated objects.\n");
> +      return opt_result::failure_at (stmt,
> +                      "can't safely apply code motion to "
> +                      "dependencies of %G to vectorize "
> +                      "the early exit.\n", stmt);
> +    }
> +
> +  tree refop = TREE_OPERAND (obj, 0);
> +  tree refbase = get_base_address (refop);
> +  if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> +      || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> +    {
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                      "early breaks only supported on"
> +                      " statically allocated objects.\n");
> +      return opt_result::failure_at (stmt,
> +                      "can't safely apply code motion to "
> +                      "dependencies of %G to vectorize "
> +                      "the early exit.\n", stmt);
> +    }
> +
> +  /* Check if vector accesses to the object will be within bounds.
> +     must be a constant or assume loop will be versioned or niters
> +     bounded by VF so accesses are within range.  */
> +  if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> +    {
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                      "early breaks not supported: vectorization "
> +                      "would %s beyond size of obj.",
> +                      DR_IS_READ (dr_ref) ? "read" : "write");
> +      return opt_result::failure_at (stmt,
> +                      "can't safely apply code motion to "
> +                      "dependencies of %G to vectorize "
> +                      "the early exit.\n", stmt);
> +    }
> +
> +  return opt_result::success ();
> +}
> +
> +/* Function vect_analyze_early_break_dependences.
> +
> +   Examine all the data references in the loop and make sure that if we have
> +   multiple exits that we are able to safely move stores such that they 
> become
>     safe for vectorization.  The function also calculates the place where to 
> move
>     the instructions to and computes what the new vUSE chain should be.
>  
> @@ -639,7 +698,7 @@ vect_analyze_data_ref_dependence (struct 
> data_dependence_relation *ddr,
>       - Multiple loads are allowed as long as they don't alias.
>  
>     NOTE:
> -     This implemementation is very conservative. Any overlappig loads/stores
> +     This implementation is very conservative. Any overlapping loads/stores
>       that take place before the early break statement gets rejected aside 
> from
>       WAR dependencies.
>  
> @@ -668,7 +727,6 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>    auto_vec<data_reference *> bases;
>    basic_block dest_bb = NULL;
>  
> -  hash_set <gimple *> visited;
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>    class loop *loop_nest = loop_outer (loop);
>  
> @@ -683,6 +741,7 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>    dest_bb = single_pred (loop->latch);
>    basic_block bb = dest_bb;
>  
> +  /* First analyse all blocks leading to dest_bb excluding dest_bb itself.  
> */
>    do
>      {
>        /* If the destination block is also the header then we have nothing to 
> do.  */
> @@ -707,53 +766,11 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>         if (!dr_ref)
>           continue;
>  
> -       /* We currently only support statically allocated objects due to
> -          not having first-faulting loads support or peeling for
> -          alignment support.  Compute the size of the referenced object
> -          (it could be dynamically allocated).  */
> -       tree obj = DR_BASE_ADDRESS (dr_ref);
> -       if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> -         {
> -           if (dump_enabled_p ())
> -             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                              "early breaks only supported on statically"
> -                              " allocated objects.\n");
> -           return opt_result::failure_at (stmt,
> -                              "can't safely apply code motion to "
> -                              "dependencies of %G to vectorize "
> -                              "the early exit.\n", stmt);
> -         }
> -
> -       tree refop = TREE_OPERAND (obj, 0);
> -       tree refbase = get_base_address (refop);
> -       if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> -           || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> -         {
> -           if (dump_enabled_p ())
> -             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                              "early breaks only supported on"
> -                              " statically allocated objects.\n");
> -           return opt_result::failure_at (stmt,
> -                              "can't safely apply code motion to "
> -                              "dependencies of %G to vectorize "
> -                              "the early exit.\n", stmt);
> -         }
> -
> -       /* Check if vector accesses to the object will be within bounds.
> -          must be a constant or assume loop will be versioned or niters
> -          bounded by VF so accesses are within range.  */
> -       if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> -         {
> -           if (dump_enabled_p ())
> -             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                              "early breaks not supported: vectorization "
> -                              "would %s beyond size of obj.",
> -                              DR_IS_READ (dr_ref) ? "read" : "write");
> -           return opt_result::failure_at (stmt,
> -                              "can't safely apply code motion to "
> -                              "dependencies of %G to vectorize "
> -                              "the early exit.\n", stmt);
> -         }
> +       /* Check if the operation is one we can safely do.  */
> +       opt_result res
> +         = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> +       if (!res)
> +         return res;
>  
>         if (DR_IS_READ (dr_ref))
>           bases.safe_push (dr_ref);
> @@ -817,6 +834,51 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>      }
>    while (bb != loop->header);
>  
> +  /* For the destination BB we need to only analyze loads reachable from the 
> early
> +     break statement itself.  */
> +  auto_vec <tree> workset;
> +  hash_set <tree> visited;
> +  gimple *last_stmt = gsi_stmt (gsi_last_bb (dest_bb));
> +  gcond *last_cond = dyn_cast <gcond *> (last_stmt);
> +  /* If the cast fails we have a different control flow statement in the 
> latch.  Most
> +     commonly this is a switch.  */
> +  if (!last_cond)
> +   return opt_result::failure_at (last_stmt,
> +                          "can't safely apply code motion to dependencies"
> +                          " to vectorize the early exit, unknown control fow"
> +                          " in stmt %G", last_stmt);
> +  workset.safe_push (gimple_cond_lhs (last_cond));
> +  workset.safe_push (gimple_cond_rhs (last_cond));
> +
> +  imm_use_iterator imm_iter;
> +  use_operand_p use_p;
> +  tree lhs;
> +  do
> +    {
> +      tree op = workset.pop ();
> +      if (visited.add (op))
> +     continue;
> +      stmt_vec_info stmt_vinfo = loop_vinfo->lookup_def (op);
> +
> +      /* Not defined in loop, don't care.  */
> +      if (!stmt_vinfo)
> +     continue;
> +      gimple *stmt = STMT_VINFO_STMT (stmt_vinfo);
> +      auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
> +      if (dr_ref)
> +     {
> +       opt_result res
> +         = vect_analyze_early_break_dependences_1 (dr_ref, stmt);
> +       if (!res)
> +         return res;
> +     }
> +      else
> +     FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
> +       if ((lhs = gimple_get_lhs (USE_STMT (use_p))))
> +         workset.safe_push (lhs);
> +    }
> +  while (!workset.is_empty ());
> +
>    /* We don't allow outer -> inner loop transitions which should have been
>       trapped already during loop form analysis.  */
>    gcc_assert (dest_bb->loop_father == loop);
> 
> 
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to