> Am 29.07.2025 um 16:54 schrieb Pengfei Li <pengfei....@arm.com>:
> 
> Hi,
> 
> I have adjusted the test case as you suggested.
> 
> Ok for trunk?

Ok

Thanks,
Richard 

> Thanks,
> Pengfei
> 
> -- >8 --
> This fixes a miscompilation issue introduced by the enablement of
> combined loop peeling and versioning. A test case that reproduces the
> issue is included in the patch.
> 
> When performing loop peeling, GCC usually inserts a skip-vector check.
> This ensures that after peeling, there are enough remaining iterations
> to enter the main vectorized loop. Previously, the check was omitted if
> loop versioning for alignment was applied. It was safe before because
> versioning and peeling for alignment were mutually exclusive.
> 
> However, with combined peeling and versioning enabled, this is not safe
> any more. A loop may be peeled and versioned at the same time. Without
> the skip-vector check, the main vectorized loop can be entered even if
> its iteration count is zero. This can cause the loop running many more
> iterations than needed, resulting in incorrect results.
> 
> To fix this, the patch updates the condition of omitting the skip-vector
> check to when versioning is performed alone without peeling.
> 
> This patch is bootstrapped and regression-tested on x86_64-linux-gnu,
> arm-linux-gnueabihf and aarch64-linux-gnu.
> 
>    PR tree-optimization/121020
> 
> gcc/ChangeLog:
> 
>    * tree-vect-loop-manip.cc (vect_do_peeling): Update the
>      condition of omitting the skip-vector check.
>    * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING):
>      Add a helper macro.
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.dg/vect/vect-early-break_138-pr121020.c: New test.
> ---
> .../vect/vect-early-break_138-pr121020.c      | 54 +++++++++++++++++++
> gcc/tree-vect-loop-manip.cc                   |  2 +-
> gcc/tree-vectorizer.h                         |  4 ++
> 3 files changed, 59 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> new file mode 100644
> index 00000000000..8cb62bf5bc9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> @@ -0,0 +1,54 @@
> +/* PR tree-optimization/121020 */
> +/* { dg-options "-O3 --vect-cost-model=unlimited" } */
> +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } 
> */
> +/* { dg-require-effective-target mmap } */
> +/* { dg-require-effective-target vect_early_break } */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +bool equal (uint64_t *restrict p, uint64_t *restrict q, int length)
> +{
> +  for (int i = 0; i < length; i++) {
> +    if (*(p + i) != *(q + i))
> +      return false;
> +  }
> +  return true;
> +}
> +
> +int main ()
> +{
> +  check_vect ();
> +
> +  long pgsz = sysconf (_SC_PAGESIZE);
> +  if (pgsz == -1) {
> +    fprintf (stderr, "sysconf failed\n");
> +    return 0;
> +  }
> +
> +  /* Allocate a whole page of memory.  */
> +  void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE,
> +            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +  if (mem == MAP_FAILED) {
> +    fprintf (stderr, "mmap failed\n");
> +    return 0;
> +  }
> +  uint64_t *p1 = (uint64_t *) mem;
> +  uint64_t *p2 = (uint64_t *) mem + 32;
> +
> +  /* The first 16 elements pointed to by p1 and p2 are the same.  */
> +  for (int i = 0; i < 32; i++) {
> +    *(p1 + i) = 0;
> +    *(p2 + i) = (i < 16 ? 0 : -1);
> +  }
> +
> +  /* All calls to equal should return true.  */
> +  for (int len = 0; len < 16; len++) {
> +    if (!equal (p1 + 1, p2 + 1, len))
> +      __builtin_abort();
> +  }
> +}
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 2d01a4b0ed1..7fcbc1ad2eb 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
> tree nitersm1,
>   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
>              ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
>                  bound_prolog + bound_epilog)
> -              : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
> +              : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
>             || vect_epilogues));
> 
>   /* Epilog loop must be executed if the number of iterations for epilog
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 203e5ad964a..e1900279432 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1197,6 +1197,10 @@ public:
>    || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)        \
>    || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
> 
> +#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L)    \
> +  ((L)->may_misalign_stmts.length () > 0        \
> +   && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
> +
> #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
>   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
> 
> --
> 2.43.0
> 

Reply via email to