New in this version:
Keep the existing if-else as-is to take care of alignment settings for
individual element misalignment. Add the logic as from the v2 patch to
the existing early-break memory access logic, extending it from its
current state of handling only gather/scatter and strided SLP accesses
to also handle elementwise accesses.
---
For the vectorization of non-contiguous memory accesses such as the
vectorization of loads from a particular struct member, specifically
when vectorizing with unknown bounds (thus using a pointer and not an
array) it is observed that inadequate alignment checking allows for
the crossing of a page boundary within a single vectorized loop
iteration. This leads to potential segmentation faults in the
resulting binaries.
For example, for the given datatype:
typedef struct {
uint64_t a;
uint64_t b;
uint32_t flag;
uint32_t pad;
} Data;
and a loop such as:
int
foo (Data *ptr) {
if (ptr == NULL)
return -1;
int cnt;
for (cnt = 0; cnt < MAX; cnt++) {
if (ptr->flag == 0)
break;
ptr++;
}
return cnt;
}
the vectorizer yields the following cfg on armhf:
<bb 1>:
_41 = ptr_4(D) + 16;
<bb 2>:
_44 = MEM[(unsigned int *)ivtmp_42];
ivtmp_45 = ivtmp_42 + 24;
_46 = MEM[(unsigned int *)ivtmp_45];
ivtmp_47 = ivtmp_45 + 24;
_48 = MEM[(unsigned int *)ivtmp_47];
ivtmp_49 = ivtmp_47 + 24;
_50 = MEM[(unsigned int *)ivtmp_49];
vect_cst__51 = {_44, _46, _48, _50};
mask_patt_6.17_52 = vect_cst__51 == { 0, 0, 0, 0 };
if (mask_patt_6.17_52 != { 0, 0, 0, 0 })
goto <bb 4>;
else
ivtmp_43 = ivtmp_42 + 96;
goto <bb 2>;
<bb4>
...
without any proper address alignment checks on the starting address
or on whether alignment is preserved across iterations. We therefore
fix the handling of such cases.
To correct this, we modify the logic in `get_load_store_type',
particularly the logic responsible for ensuring we don't read more
than the scalar code would in the context of early breaks, extending
it from handling not only gather-scatter and strided SLP accesses but
also allowing it to properly handle element-wise accesses, wherein we
specify that these need correct block alignment, thus promoting their
`alignment_support_scheme' from `dr_unaligned_supported' to
`dr_aligned'.
gcc/ChangeLog:
PR tree-optimization/124037
* tree-vect-stmts.cc (get_load_store_type): Fix
alignment_support_scheme categorization for early
break VMAT_ELEMENTWISE accesses.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-pr124037.c: New.
g++/testsuite/ChangeLog:
* g++.dg/vect/vect-pr124037.c: New.
---
gcc/testsuite/g++.dg/vect/vect-pr124037.cc | 38 ++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-pr124037.c | 58 ++++++++++++++++++++++
gcc/tree-vect-stmts.cc | 42 ++++++++++------
3 files changed, 122 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/vect/vect-pr124037.cc
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr124037.c
diff --git a/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
new file mode 100644
index 00000000000..e25ba02ec9b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vect-pr124037.cc
@@ -0,0 +1,38 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-additional-options "-std=c++11" } */
+
+struct Token
+{
+ unsigned t, t1, t2;
+ void *a, *b;
+ unsigned short Kind;
+ unsigned short flags;
+ constexpr Token() : Token(0){}
+ constexpr Token(int a): t(0), t1(0), t2 (0), a(nullptr),b (nullptr),
+ Kind(a), flags(0) {}
+ bool isNot(int K) const { return Kind != K; }
+};
+
+[[gnu::noipa]]
+unsigned getArgLength(const Token *ArgPtr) {
+ unsigned NumArgTokens = 0;
+ for (; ArgPtr->isNot(0); ++ArgPtr)
+ ++NumArgTokens;
+ return NumArgTokens;
+}
+
+Token tokens[] = {{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},{1},
+ {1},{1},{1},{1},{1},{1},{}};
+
+int main()
+{
+ __builtin_printf("%d\n", getArgLength(tokens));
+}
+/* { dg-final { scan-tree-dump "missed: not vectorized: relevant stmt not
supported: _\[0-9\]+ = ArgPtr_\[0-9\]+->Kind;" "vect" } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
new file mode 100644
index 00000000000..3954e23f456
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr124037.c
@@ -0,0 +1,58 @@
+/* PR tree-optimization/124037 */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#define MAX 65536
+
+typedef struct {
+ uint64_t a;
+ uint64_t b;
+ uint32_t flag;
+ uint32_t pad;
+} Data;
+
+int __attribute__ ((noinline))
+foo (Data *ptr) {
+ if (ptr == NULL)
+ return -1;
+
+ int cnt;
+ for (cnt = 0; cnt < MAX; cnt++) {
+ if (ptr->flag == 0)
+ break;
+ ptr++;
+ }
+ return cnt;
+}
+
+int main() {
+ long pgsz = sysconf (_SC_PAGESIZE);
+ if (pgsz == -1) {
+ fprintf (stderr, "sysconf failed\n");
+ return 0;
+ }
+
+ /* Allocate 2 consecutive pages. */
+ void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ fprintf (stderr, "mmap failed\n");
+ return 0;
+ }
+
+ memset (mem, 1, pgsz);
+ uint8_t *ptr = (uint8_t*) mem + pgsz;
+ memset (ptr - 16, 0, 16);
+
+ mprotect (ptr, pgsz, PROT_NONE);
+ Data *data = (Data *) (ptr - 80);
+ __builtin_printf("%d\n", foo(data));
+}
+
+/* { dg-final { scan-tree-dump "missed: not vectorized: relevant stmt not
supported: _\[0-9\]+ = ptr_\[0-9\]+->flag;" "vect" } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 22285250aa8..17ab707350b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2500,9 +2500,9 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
*poffset = neg_ldst_offset;
if (*memory_access_type == VMAT_ELEMENTWISE
- || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY
- || *memory_access_type == VMAT_STRIDED_SLP
- || *memory_access_type == VMAT_INVARIANT)
+ ||*memory_access_type == VMAT_GATHER_SCATTER_LEGACY
+ || *memory_access_type == VMAT_STRIDED_SLP
+ || *memory_access_type == VMAT_INVARIANT)
{
*alignment_support_scheme = dr_unaligned_supported;
*misalignment = DR_MISALIGNMENT_UNKNOWN;
@@ -2562,20 +2562,30 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
stmt_info,
force masking. */
if (loop_vinfo
&& dr_safe_speculative_read_required (stmt_info)
- && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && (mat_gather_scatter_p (*memory_access_type)
- || *memory_access_type == VMAT_STRIDED_SLP))
+ && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "early break not supported: cannot peel for "
- "alignment. With non-contiguous memory vectorization"
- " could read out of bounds at %G ",
- STMT_VINFO_STMT (stmt_info));
- if (inbounds)
- LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
- else
- return false;
+ if (mat_gather_scatter_p (*memory_access_type)
+ || *memory_access_type == VMAT_STRIDED_SLP)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "early break not supported: cannot peel for "
+ "alignment. With non-contiguous memory
vectorization"
+ " could read out of bounds at %G ",
+ STMT_VINFO_STMT (stmt_info));
+ if (inbounds)
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ else
+ return false;
+ }
+ /* Block-level alignment: Even though individual accesses of
+ VMAT_ELEMENTWISE type do not cause alignment problems, loading the
+ whole vector's worth of values in a speculative early-break context
+ might cross a page boundary. Set the alignment scheme to `dr_aligned'
+ here in order to force checking of whether such accesses meet
+ alignment criteria. */
+ else if (*memory_access_type == VMAT_ELEMENTWISE && !inbounds)
+ *alignment_support_scheme = dr_aligned;
}
/* If this DR needs alignment for correctness, we must ensure the target
--
2.43.0