> Am 29.07.2025 um 20:37 schrieb Pengfei Li <pengfei....@arm.com>:
> 
> Hi,
> 
> The comment in v2 is addressed. Tested again on both trunk and gcc-15.
> 
> Ok for trunk and gcc-15?
> 

Ok

Richard 


> Changes in v3:
> - Extract the constant VF check out.
> 
> Changes in v2:
> - Remove the condition of dr_safe_speculative_read_required.
> - Add a constant VF check.
> 
> Thanks,
> Pengfei
> 
> -- >8 --
> This patch fixes a segmentation fault issue that can occur in vectorized
> loops with an early break. When GCC vectorizes such loops, it may insert
> a versioning check to ensure that data references (DRs) with speculative
> loads are aligned. The check normally requires DRs to be aligned to the
> vector mode size, which prevents generated vector load instructions from
> crossing page boundaries.
> 
> However, this is not sufficient when a single scalar load is vectorized
> into multiple loads within the same iteration. In such cases, even if
> none of the vector loads crosses page boundaries, subsequent loads after
> the first one may still access memory beyond current valid page.
> 
> Consider the following loop as an example:
> 
>    while (i < MAX_COMPARE) {
>      if (*(p + i) != *(q + i))
>        return i;
>      i++;
>    }
> 
> When compiled with "-O3 -march=znver2" on x86, the vectorized loop may
> include instructions like:
> 
>    vmovdqa (%rcx,%rax), %ymm0
>    vmovdqa 32(%rcx,%rax), %ymm1
>    vpcmpeqq (%rdx,%rax), %ymm0, %ymm0
>    vpcmpeqq 32(%rdx,%rax), %ymm1, %ymm1
> 
> Note two speculative vector loads are generated for each DR (p and q).
> The first vmovdqa and vpcmpeqq are safe due to the vector size (32-byte)
> alignment, but the following ones (at offset 32) may not be safe because
> they could read from the beginning of the next memory page, potentially
> leading to segmentation faults.
> 
> To avoid the issue, this patch increases the alignment requirement to
> DR_TARGET_ALIGNMENT. It ensures all vector loads in the same vector
> iteration access memory within the same page.
> 
> This patch is bootstrapped and regression-tested on x86_64-linux-gnu,
> arm-linux-gnueabihf and aarch64-linux-gnu.
> 
>    PR tree-optimization/121190
> 
> gcc/ChangeLog:
> 
>    * tree-vect-data-refs.cc (vect_enhance_data_refs_alignment):
>      Increase alignment requirement for speculative loads.
> 
> gcc/testsuite/ChangeLog:
> 
>    * gcc.dg/vect/vect-early-break_52.c: Update an unsafe test.
>    * gcc.dg/vect/vect-early-break_137.c-pr121190: New test.
> ---
> .../vect/vect-early-break_137-pr121190.c      | 62 +++++++++++++++++++
> .../gcc.dg/vect/vect-early-break_52.c         |  2 +-
> gcc/tree-vect-data-refs.cc                    | 20 ++----
> 3 files changed, 69 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
> new file mode 100644
> index 00000000000..e6b071c411c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
> @@ -0,0 +1,62 @@
> +/* PR tree-optimization/121190 */
> +/* { dg-options "-O3" } */
> +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } 
> */
> +/* { dg-require-effective-target mmap } */
> +/* { dg-require-effective-target vect_early_break } */
> +
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include <unistd.h>
> +#include "tree-vect.h"
> +
> +#define MAX_COMPARE 5000
> +
> +__attribute__((noipa))
> +int diff (uint64_t *restrict p, uint64_t *restrict q)
> +{
> +  int i = 0;
> +  while (i < MAX_COMPARE) {
> +    if (*(p + i) != *(q + i))
> +      return i;
> +    i++;
> +  }
> +  return -1;
> +}
> +
> +int main ()
> +{
> +  check_vect ();
> +
> +  long pgsz = sysconf (_SC_PAGESIZE);
> +  if (pgsz == -1) {
> +    fprintf (stderr, "sysconf failed\n");
> +    return 0;
> +  }
> +
> +  /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the
> +     beginning of each.  */
> +  void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
> +            MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +  if (mem == MAP_FAILED) {
> +    fprintf (stderr, "mmap failed\n");
> +    return 0;
> +  }
> +  uint64_t *p1 = (uint64_t *) mem;
> +  uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t);
> +
> +  /* Fill the first page with zeros, except for its last 64 bits.  */
> +  memset (p1, 0, pgsz);
> +  *(p2 - 1) = -1;
> +
> +  /* Make the 2nd page not accessable.  */
> +  mprotect (p2, pgsz, PROT_NONE);
> +
> +  /* Calls to diff should not read the 2nd page.  */
> +  for (int i = 1; i <= 20; i++) {
> +    if (diff (p2 - i, p1) != i - 1)
> +      __builtin_abort ();
> +  }
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
> index 86a632f2a82..6abfcd6580e 100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
> @@ -18,4 +18,4 @@ int main1 (short X)
>     }
> }
> 
> -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { 
> target { ! "x86_64-*-* i?86-*-*" } } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { 
> target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index e7919b73c25..da700cd1f3d 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -2918,12 +2918,14 @@ vect_enhance_data_refs_alignment (loop_vec_info 
> loop_vinfo)
>      2) there is at least one unsupported misaligned data ref with an unknown
>         misalignment, and
>      3) all misaligned data refs with a known misalignment are supported, and
> -     4) the number of runtime alignment checks is within reason.  */
> +     4) the number of runtime alignment checks is within reason.
> +     5) the vectorization factor is a constant.  */
> 
>   do_versioning
>     = (optimize_loop_nest_for_speed_p (loop)
>        && !loop->inner /* FORNOW */
> -       && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
> +       && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP)
> +       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ();
> 
>   if (do_versioning)
>     {
> @@ -2964,17 +2966,6 @@ vect_enhance_data_refs_alignment (loop_vec_info 
> loop_vinfo)
>                   break;
>                 }
> 
> -          /* At present we don't support versioning for alignment
> -         with variable VF, since there's no guarantee that the
> -         VF is a power of two.  We could relax this if we added
> -         a way of enforcing a power-of-two size.  */
> -          unsigned HOST_WIDE_INT size;
> -          if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
> -        {
> -          do_versioning = false;
> -          break;
> -        }
> -
>          /* Forcing alignment in the first iteration is no good if
>         we don't keep it across iterations.  For now, just disable
>         versioning in this case.
> @@ -2993,7 +2984,8 @@ vect_enhance_data_refs_alignment (loop_vec_info 
> loop_vinfo)
>                  Construct the mask needed for this test.  For example,
>                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
>                  mask must be 15 = 0xf. */
> -          int mask = size - 1;
> +          gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ());
> +          int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1;
> 
>          /* FORNOW: use the same mask to test all potentially unaligned
>         references in the loop.  */
> --
> 2.43.0
> 

Reply via email to