> Am 29.07.2025 um 16:54 schrieb Pengfei Li <pengfei....@arm.com>:
>
> Hi,
>
> I have adjusted the test case as you suggested.
>
> Ok for trunk?
Ok
Thanks,
Richard
> Thanks,
> Pengfei
>
> -- >8 --
> This fixes a miscompilation issue introduced by the enablement of
> combined loop peeling and versioning. A test case that reproduces the
> issue is included in the patch.
>
> When performing loop peeling, GCC usually inserts a skip-vector check.
> This ensures that after peeling, there are enough remaining iterations
> to enter the main vectorized loop. Previously, the check was omitted if
> loop versioning for alignment was applied. It was safe before because
> versioning and peeling for alignment were mutually exclusive.
>
> However, with combined peeling and versioning enabled, this is not safe
> any more. A loop may be peeled and versioned at the same time. Without
> the skip-vector check, the main vectorized loop can be entered even if
> its iteration count is zero. This can cause the loop running many more
> iterations than needed, resulting in incorrect results.
>
> To fix this, the patch updates the condition of omitting the skip-vector
> check to when versioning is performed alone without peeling.
>
> This patch is bootstrapped and regression-tested on x86_64-linux-gnu,
> arm-linux-gnueabihf and aarch64-linux-gnu.
>
> PR tree-optimization/121020
>
> gcc/ChangeLog:
>
> * tree-vect-loop-manip.cc (vect_do_peeling): Update the
> condition of omitting the skip-vector check.
> * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING):
> Add a helper macro.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/vect-early-break_138-pr121020.c: New test.
> ---
> .../vect/vect-early-break_138-pr121020.c | 54 +++++++++++++++++++
> gcc/tree-vect-loop-manip.cc | 2 +-
> gcc/tree-vectorizer.h | 4 ++
> 3 files changed, 59 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> new file mode 100644
> index 00000000000..8cb62bf5bc9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
> @@ -0,0 +1,54 @@
> +/* PR tree-optimization/121020 */
> +/* { dg-options "-O3 --vect-cost-model=unlimited" } */
> +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } }
> */
> +/* { dg-require-effective-target mmap } */
> +/* { dg-require-effective-target vect_early_break } */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include "tree-vect.h"
> +
> +__attribute__((noipa))
> +bool equal (uint64_t *restrict p, uint64_t *restrict q, int length)
> +{
> + for (int i = 0; i < length; i++) {
> + if (*(p + i) != *(q + i))
> + return false;
> + }
> + return true;
> +}
> +
> +int main ()
> +{
> + check_vect ();
> +
> + long pgsz = sysconf (_SC_PAGESIZE);
> + if (pgsz == -1) {
> + fprintf (stderr, "sysconf failed\n");
> + return 0;
> + }
> +
> + /* Allocate a whole page of memory. */
> + void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + if (mem == MAP_FAILED) {
> + fprintf (stderr, "mmap failed\n");
> + return 0;
> + }
> + uint64_t *p1 = (uint64_t *) mem;
> + uint64_t *p2 = (uint64_t *) mem + 32;
> +
> + /* The first 16 elements pointed to by p1 and p2 are the same. */
> + for (int i = 0; i < 32; i++) {
> + *(p1 + i) = 0;
> + *(p2 + i) = (i < 16 ? 0 : -1);
> + }
> +
> + /* All calls to equal should return true. */
> + for (int len = 0; len < 16; len++) {
> + if (!equal (p1 + 1, p2 + 1, len))
> + __builtin_abort();
> + }
> +}
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 2d01a4b0ed1..7fcbc1ad2eb 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
> tree nitersm1,
> bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
> bound_prolog + bound_epilog)
> - : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
> + : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
> || vect_epilogues));
>
> /* Epilog loop must be executed if the number of iterations for epilog
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 203e5ad964a..e1900279432 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1197,6 +1197,10 @@ public:
> || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
> || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
>
> +#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \
> + ((L)->may_misalign_stmts.length () > 0 \
> + && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
> +
> #define LOOP_VINFO_NITERS_KNOWN_P(L) \
> (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
>
> --
> 2.43.0
>