The following fixes too aggressive disabling of peeling for gaps for interleaving and load-lanes (as well as fixing the test so it applies for multi-element as well).
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2017-03-06 Richard Biener <rguent...@suse.de> PR tree-optimization/79824 * tree-vect-stmts.c (get_group_load_store_type): Fix alignment check disabling peeling for gaps. * gcc.dg/vect/pr79824-1.c: New testcase. * gcc.dg/vect/pr79824-2.c: Likewise. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 245908) +++ gcc/tree-vect-stmts.c (working copy) @@ -1731,7 +1731,7 @@ get_group_load_store_type (gimple *stmt, bool single_element_p = (stmt == first_stmt && !GROUP_NEXT_ELEMENT (stmt_info)); unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); - int nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); /* True if the vectorized statements would access beyond the last statement in the group. */ @@ -1794,9 +1794,13 @@ get_group_load_store_type (gimple *stmt, /* If there is a gap at the end of the group then these optimizations would access excess elements in the last iteration. */ bool would_overrun_p = (gap != 0); - /* If the access is aligned an overrun is fine. */ + /* If the access is aligned an overrun is fine, but only if the + overrun is not inside an unused vector (if the gap is as large + or larger than a vector). */ if (would_overrun_p - && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info))) + && gap < nunits + && aligned_access_p + (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))) would_overrun_p = false; if (!STMT_VINFO_STRIDED_P (stmt_info) && (can_overrun_p || !would_overrun_p) Index: gcc/testsuite/gcc.dg/vect/pr79824-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr79824-1.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr79824-1.c (working copy) @@ -0,0 +1,46 @@ +/* { dg-require-effective-target mmap } */ + +#include <sys/mman.h> +#include <stdio.h> +#include "tree-vect.h" + +#define COUNT 320 +#define MMAP_SIZE 0x10000 +#define ADDRESS 0x1122000000 +#define TYPE double + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +void __attribute__((noinline)) +foo (TYPE *__restrict a, TYPE *__restrict b) +{ + int n; + + b = __builtin_assume_aligned (b, sizeof (TYPE) * 2); + a = __builtin_assume_aligned (a, sizeof (TYPE) * 2); + for (n = 0; n < COUNT; n++) + a[n] = b[n * 4]; +} + +int +main (void) +{ + void *x; + size_t b_offset; + + check_vect (); + + x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (x == MAP_FAILED) + { + perror ("mmap"); + return 1; + } + + b_offset = MMAP_SIZE - (4 * COUNT - 2) * sizeof (TYPE); + foo ((TYPE *) x, (TYPE *) ((char *) x + b_offset)); + return 0; +} Index: gcc/testsuite/gcc.dg/vect/pr79824-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr79824-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr79824-2.c (working copy) @@ -0,0 +1,48 @@ +/* { dg-require-effective-target mmap } */ + +#include <sys/mman.h> +#include <stdio.h> +#include "tree-vect.h" + +#define COUNT 320 +#define MMAP_SIZE 0x10000 +#define ADDRESS 0x1122000000 +#define TYPE double + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +void __attribute__((noinline)) +foo (TYPE *__restrict a, TYPE *__restrict b) +{ + int n; + + b = __builtin_assume_aligned (b, sizeof (TYPE) * 2); + a = __builtin_assume_aligned (a, sizeof (TYPE) * 2); + for (n = 0; n < COUNT; n++) + { + a[n] = b[n * 4] + b[n * 4 + 1]; + } +} + +int +main (void) +{ + void *x; + size_t b_offset; + + check_vect (); + + x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (x == MAP_FAILED) + { + perror ("mmap"); + return 1; + } + + b_offset = MMAP_SIZE - (4 * COUNT - 2) * sizeof (TYPE); + foo ((TYPE *) x, (TYPE *) ((char *) x + b_offset)); + return 0; +}