For the vectorization of non-contiguous memory accesses such as the
vectorization of loads from a particular struct member, specifically
when vectorizing with unknown bounds (thus using a pointer and not an
array) it is observed that inadequate alignment checking allows for
the crossing of a page boundary within a single vectorized loop
iteration. This leads to potential segmentation faults in the
resulting binaries.

For example, for the given datatype:

    typedef struct {
      uint64_t a;
      uint64_t b;
      uint32_t flag;
      uint32_t pad;
    } Data;

and a loop such as:

int
foo (Data *ptr) {
  if (ptr == NULL)
    return -1;

  int cnt;
  for (cnt = 0; cnt < MAX; cnt++) {
    if (ptr->flag == 0)
      break;
    ptr++;
  }
  return cnt;
}

the vectorizer yields the following cfg on armhf:

<bb 1>:
_41 = ptr_4(D) + 16;
<bb 2>:
_44 = MEM[(unsigned int *)ivtmp_42];
ivtmp_45 = ivtmp_42 + 24;
_46 = MEM[(unsigned int *)ivtmp_45];
ivtmp_47 = ivtmp_45 + 24;
_48 = MEM[(unsigned int *)ivtmp_47];
ivtmp_49 = ivtmp_47 + 24;
_50 = MEM[(unsigned int *)ivtmp_49];
vect_cst__51 = {_44, _46, _48, _50};
mask_patt_6.17_52 = vect_cst__51 == { 0, 0, 0, 0 };
if (mask_patt_6.17_52 != { 0, 0, 0, 0 })
  goto <bb 4>;
else
  ivtmp_43 = ivtmp_42 + 96;
  goto <bb 2>;
<bb4>
...

without any proper address alignment checks on the starting address
or on whether alignment is preserved across iterations. We therefore
fix the handling of such cases.

In `vect_compute_data_ref_alignment' we ensure that the vector
alignment requirements are sufficiently large to encompass the entire
address range covered by the group of element-wise loads carried out
in the single vectorized iteration, rounded up to the nearest power of
2.  Finally, in `get_load_store_type', we ensure correct
categorization of the alignment support scheme for VMAT_ELEMENTWISE
access types, depending on whether or not they happen within the
context of a scalarized vector access in a loop with unknown bounds
where speculative reads are required.  Where this is found to be the
case, we modify `alignment_support_scheme' from
`dr_unaligned_supported' to `dr_unaligned_unsupported', ensuring
proper alignment is required.

Regtested on aarch64, armhf and x86_64, no new regressions.

gcc/ChangeLog:

        PR tree-optimization/124037
        * tree-vect-stmts.cc (get_load_store_type): Fix
        alignment_support_scheme categorization of  VMAT_ELEMENTWISE
        loads.
        * tree-vect-data-refs.cc (vect_compute_data_ref_alignment):
        Round alignments to next power of 2.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/pr123588.c: New.
---
 gcc/testsuite/gcc.dg/vect/pr124037.c | 57 ++++++++++++++++++++++++++++
 gcc/tree-vect-data-refs.cc           | 21 +++++-----
 gcc/tree-vect-stmts.cc               | 14 ++++++-
 3 files changed, 80 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr124037.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr124037.c 
b/gcc/testsuite/gcc.dg/vect/pr124037.c
new file mode 100644
index 00000000000..3121380454b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr124037.c
@@ -0,0 +1,57 @@
+/* PR tree-optimization/124037 */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#define MAX 65536
+
+typedef struct {
+  uint64_t a;
+  uint64_t b;
+  uint32_t flag;
+  uint32_t pad;
+} Data;
+
+int __attribute__ ((noinline))
+foo (Data *ptr) {
+  if (ptr == NULL)
+    return -1;
+
+  int cnt;
+  for (cnt = 0; cnt < MAX; cnt++) {
+    if (ptr->flag == 0)
+      break;
+    ptr++;
+  }
+  return cnt;
+}
+
+int main() {
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed\n");
+    return 0;
+  }
+
+  /* Allocate 2 consecutive pages.  */
+  void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    fprintf (stderr, "mmap failed\n");
+    return 0;
+  }
+
+  memset (mem, 1, pgsz);
+  uint8_t *ptr = (uint8_t*) mem + pgsz;
+  memset (ptr - 16, 0, 16);
+
+  mprotect (ptr, pgsz, PROT_NONE);
+  Data *data = (Data *) (ptr - 80);
+  __builtin_printf("%d\n", foo(data));
+}
+
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index e1facc7e957..0f6b4539176 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1460,18 +1460,19 @@ vect_compute_data_ref_alignment (vec_info *vinfo, 
dr_vec_info *dr_info,
       poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
       poly_uint64 new_alignment = vf * align_factor_c;
 
-      if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
-         || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
+      if ((vf.is_constant () && !pow2p_hwi (new_alignment.to_constant ()))
+         || (!vf.is_constant () && !pow2p_hwi (align_factor_c)))
+       new_alignment
+         = HOST_WIDE_INT_1U << ceil_log2 (new_alignment.to_constant ());
+
+      if (dump_enabled_p ())
        {
-         if (dump_enabled_p ())
-           {
-             dump_printf_loc (MSG_NOTE, vect_location,
-                              "alignment increased due to early break to ");
-             dump_dec (MSG_NOTE, new_alignment);
-             dump_printf (MSG_NOTE, " bytes.\n");
-           }
-         vector_alignment = new_alignment;
+         dump_printf_loc (MSG_NOTE, vect_location,
+                          "alignment increased due to early break to ");
+         dump_dec (MSG_NOTE, new_alignment);
+         dump_printf (MSG_NOTE, " bytes.\n");
        }
+      vector_alignment = new_alignment;
     }
 
   SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ee98e72d1e5..9045a30d5a4 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2502,8 +2502,18 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
       || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
     *poffset = neg_ldst_offset;
 
-  if (*memory_access_type == VMAT_ELEMENTWISE
-      || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
+  if (*memory_access_type == VMAT_ELEMENTWISE)
+    {
+      if (dr_safe_speculative_read_required (stmt_info)
+         && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+         && !DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info)))
+       *alignment_support_scheme = dr_unaligned_unsupported;
+      else
+       *alignment_support_scheme = dr_unaligned_supported;
+      *misalignment = DR_MISALIGNMENT_UNKNOWN;
+    }
+
+  else if (*memory_access_type == VMAT_GATHER_SCATTER_LEGACY
       || *memory_access_type == VMAT_STRIDED_SLP
       || *memory_access_type == VMAT_INVARIANT)
     {
-- 
2.43.0

Reply via email to