> Am 01.02.2024 um 22:34 schrieb Tamar Christina <tamar.christ...@arm.com>:
> 
> 
>> 
>>> 
>>> If the above is correct then I think I understand what you're saying and
>>> will update the patch and do some Checks.
>> 
>> Yes, I think that's what I wanted to say.
>> 
> 
> As discussed:
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and x86_64-pc-linux-gnu no 
> issues.
> Also checked both with --enable-lto --with-build-config='bootstrap-O3 
> bootstrap-lto' --enable-multilib
> and --enable-lto --with-build-config=bootstrap-O3 
> --enable-checking=release,yes,rtl,extra;
> and checked the libcrypt testsuite as reported on PR113467.
> 
> Ok for master?

Ok

Richard 

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>    PR tree-optimization/113588
>    PR tree-optimization/113467
>    (vect_analyze_data_ref_dependence):  Choose correct dest and fix checks.
>    (vect_analyze_early_break_dependences): Update comments.
> 
> gcc/testsuite/ChangeLog:
> 
>    PR tree-optimization/113588
>    PR tree-optimization/113467
>    * gcc.dg/vect/vect-early-break_108-pr113588.c: New test.
>    * gcc.dg/vect/vect-early-break_109-pr113588.c: New test.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..e488619c9aac41fafbcf479818392a6bb7c6924f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_108-pr113588.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> +   ++len;
> + return len;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..488c19d3ede809631d1a7ede0e7f7bcdc7a1ae43
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_109-pr113588.c
> @@ -0,0 +1,44 @@
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target mmap } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +#include <sys/mman.h>
> +#include <unistd.h>
> +
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +int foo (const char *s, unsigned long n)
> +{
> + unsigned long len = 0;
> + while (*s++ && n--)
> +   ++len;
> + return len;
> +}
> +
> +int main()
> +{
> +
> +  check_vect ();
> +
> +  long pgsz = sysconf (_SC_PAGESIZE);
> +  void *p = mmap (NULL, pgsz * 3, PROT_READ|PROT_WRITE,
> +     MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> +  if (p == MAP_FAILED)
> +    return 0;
> +  mprotect (p, pgsz, PROT_NONE);
> +  mprotect (p+2*pgsz, pgsz, PROT_NONE);
> +  char *p1 = p + pgsz;
> +  p1[0] = 1;
> +  p1[1] = 0;
> +  foo (p1, 1000);
> +  p1 = p + 2*pgsz - 2;
> +  p1[0] = 1;
> +  p1[1] = 0;
> +  foo (p1, 1000);
> +  return 0;
> +}
> +
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 
> f592aeb8028afd4fd70e2175104efab2a2c0d82e..53fdfc25d7dc2deb7788176252697d2e455555fc
>  100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -619,10 +619,10 @@ vect_analyze_data_ref_dependence (struct 
> data_dependence_relation *ddr,
>   return opt_result::success ();
> }
> 
> -/* Funcion vect_analyze_early_break_dependences.
> +/* Function vect_analyze_early_break_dependences.
> 
> -   Examime all the data references in the loop and make sure that if we have
> -   mulitple exits that we are able to safely move stores such that they 
> become
> +   Examine all the data references in the loop and make sure that if we have
> +   multiple exits that we are able to safely move stores such that they 
> become
>    safe for vectorization.  The function also calculates the place where to 
> move
>    the instructions to and computes what the new vUSE chain should be.
> 
> @@ -639,7 +639,7 @@ vect_analyze_data_ref_dependence (struct 
> data_dependence_relation *ddr,
>      - Multiple loads are allowed as long as they don't alias.
> 
>    NOTE:
> -     This implemementation is very conservative. Any overlappig loads/stores
> +     This implementation is very conservative. Any overlapping loads/stores
>      that take place before the early break statement gets rejected aside from
>      WAR dependencies.
> 
> @@ -668,7 +668,6 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>   auto_vec<data_reference *> bases;
>   basic_block dest_bb = NULL;
> 
> -  hash_set <gimple *> visited;
>   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>   class loop *loop_nest = loop_outer (loop);
> 
> @@ -677,19 +676,33 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>             "loop contains multiple exits, analyzing"
>             " statement dependencies.\n");
> 
> +  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> +    if (dump_enabled_p ())
> +      dump_printf_loc (MSG_NOTE, vect_location,
> +               "alternate exit has been chosen as main exit.\n");
> +
>   /* Since we don't support general control flow, the location we'll move the
>      side-effects to is always the latch connected exit.  When we support
> -     general control flow we can do better but for now this is fine.  */
> -  dest_bb = single_pred (loop->latch);
> +     general control flow we can do better but for now this is fine.  Move
> +     side-effects to the in-loop destination of the last early exit.  For 
> the PEELED
> +     case we move the side-effects to the latch block as this is guaranteed 
> to be the
> +     last block to be executed when a vector iteration finished.  */
> +  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> +    dest_bb = loop->latch;
> +  else
> +    dest_bb = single_pred (loop->latch);
> +
> +  /* We start looking from dest_bb, for the non-PEELED case we don't want to
> +     move any stores already present, but we do want to read and validate the
> +     loads.  */
>   basic_block bb = dest_bb;
> 
> +  /* In the peeled case we need to check all the loads in the loop since to 
> move the
> +     the stores we lift the stores over all loads into the latch.  */
> +  bool check_deps = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
> +
>   do
>     {
> -      /* If the destination block is also the header then we have nothing to 
> do.  */
> -      if (!single_pred_p (bb))
> -    continue;
> -
> -      bb = single_pred (bb);
>       gimple_stmt_iterator gsi = gsi_last_bb (bb);
> 
>       /* Now analyze all the remaining statements and try to determine which
> @@ -707,42 +720,13 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>      if (!dr_ref)
>        continue;
> 
> -      /* We currently only support statically allocated objects due to
> -         not having first-faulting loads support or peeling for
> -         alignment support.  Compute the size of the referenced object
> -         (it could be dynamically allocated).  */
> -      tree obj = DR_BASE_ADDRESS (dr_ref);
> -      if (!obj || TREE_CODE (obj) != ADDR_EXPR)
> -        {
> -          if (dump_enabled_p ())
> -        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                 "early breaks only supported on statically"
> -                 " allocated objects.\n");
> -          return opt_result::failure_at (stmt,
> -                 "can't safely apply code motion to "
> -                 "dependencies of %G to vectorize "
> -                 "the early exit.\n", stmt);
> -        }
> -
> -      tree refop = TREE_OPERAND (obj, 0);
> -      tree refbase = get_base_address (refop);
> -      if (!refbase || !DECL_P (refbase) || !DECL_SIZE (refbase)
> -          || TREE_CODE (DECL_SIZE (refbase)) != INTEGER_CST)
> -        {
> -          if (dump_enabled_p ())
> -        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                 "early breaks only supported on"
> -                 " statically allocated objects.\n");
> -          return opt_result::failure_at (stmt,
> -                 "can't safely apply code motion to "
> -                 "dependencies of %G to vectorize "
> -                 "the early exit.\n", stmt);
> -        }
> -
>      /* Check if vector accesses to the object will be within bounds.
>         must be a constant or assume loop will be versioned or niters
> -         bounded by VF so accesses are within range.  */
> -      if (!ref_within_array_bound (stmt, DR_REF (dr_ref)))
> +         bounded by VF so accesses are within range.  We only need to check 
> the
> +         reads since writes are moved to a safe place where if we get there 
> we
> +         know they are safe to perform.  */
> +      if (DR_IS_READ (dr_ref)
> +          && !ref_within_array_bound (stmt, DR_REF (dr_ref)))
>        {
>          if (dump_enabled_p ())
>        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -755,6 +739,9 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>                 "the early exit.\n", stmt);
>        }
> 
> +      if (!check_deps)
> +        continue;
> +
>      if (DR_IS_READ (dr_ref))
>        bases.safe_push (dr_ref);
>      else if (DR_IS_WRITE (dr_ref))
> @@ -814,8 +801,19 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>                 "marked statement for vUSE update: %G", stmt);
>        }
>    }
> +
> +      if (!single_pred_p (bb))
> +    {
> +      gcc_assert (bb == loop->header);
> +      break;
> +    }
> +
> +      /* For the non-PEELED case we don't want to check the loads in the IV 
> exit block
> +     for dependencies with the stores, but any block preceeding it we do.  */
> +      check_deps = true;
> +      bb = single_pred (bb);
>     }
> -  while (bb != loop->header);
> +  while (1);
> 
>   /* We don't allow outer -> inner loop transitions which should have been
>      trapped already during loop form analysis.  */
> <rb18216.patch>

Reply via email to