On Thu, Feb 8, 2018 at 1:09 PM, Richard Sandiford <richard.sandif...@linaro.org> wrote: > Richard Biener <richard.guent...@gmail.com> writes: >> On Fri, Feb 2, 2018 at 3:12 PM, Richard Sandiford >> <richard.sandif...@linaro.org> wrote: >>> Index: gcc/tree-data-ref.c >>> =================================================================== >>> --- gcc/tree-data-ref.c 2018-02-02 14:03:53.964530009 +0000 >>> +++ gcc/tree-data-ref.c 2018-02-02 14:03:54.184521826 +0000 >>> @@ -721,7 +721,13 @@ split_constant_offset_1 (tree type, tree >>> if (TREE_CODE (tmp_var) != SSA_NAME) >>> return false; >>> wide_int var_min, var_max; >>> - if (get_range_info (tmp_var, &var_min, &var_max) != >>> VR_RANGE) >>> + value_range_type vr_type = get_range_info (tmp_var, >>> &var_min, >>> + &var_max); >>> + wide_int var_nonzero = get_nonzero_bits (tmp_var); >>> + signop sgn = TYPE_SIGN (itype); >>> + if (intersect_range_with_nonzero_bits (vr_type, &var_min, >>> + &var_max, >>> var_nonzero, >>> + sgn) != VR_RANGE) >> >> Above it looks like we could go from VR_RANGE to VR_UNDEFINED. >> I'm not sure if the original range-info might be useful in this case - >> if it may be >> can we simply use only the range info if it was VR_RANGE? > > I think we only drop to VR_UNDEFINED if we have contradictory > information: nonzero bits says some bits must be clear, but the range > only contains values for which the bits are set. In that case I think > we should either be conservative and not use the information, or be > aggressive and say that we have undefined behaviour, so overflow is OK. > > It seems a bit of a fudge to go back to the old range when we know it's > false, and use it to allow the split some times and not others.
Fine. > Thanks, > Richard > >> >> Ok otherwise. >> Thanks, >> Richard. >> >>> return false; >>> >>> /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF) >>> @@ -729,7 +735,6 @@ split_constant_offset_1 (tree type, tree >>> operations done in ITYPE. The addition must overflow >>> at both ends of the range or at neither. */ >>> bool overflow[2]; >>> - signop sgn = TYPE_SIGN (itype); >>> unsigned int prec = TYPE_PRECISION (itype); >>> wide_int woff = wi::to_wide (tmp_off, prec); >>> wide_int op0_min = wi::add (var_min, woff, sgn, >>> &overflow[0]); >>> Index: gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c >>> =================================================================== >>> --- /dev/null 2018-02-02 09:03:36.168354735 +0000 >>> +++ gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-3.c 2018-02-02 >>> 14:03:54.183521863 +0000 >>> @@ -0,0 +1,62 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-additional-options "-fno-tree-loop-vectorize" } */ >>> +/* { dg-require-effective-target vect_double } */ >>> +/* { dg-require-effective-target lp64 } */ >>> + >>> +void >>> +f1 (double *p, double *q, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = 0; i < n; i += 4) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +void >>> +f2 (double *p, double *q, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = 0; i < n; i += 2) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +void >>> +f3 (double *p, double *q, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = 0; i < n; i += 6) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +void >>> +f4 (double *p, double *q, unsigned int start, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = start & -2; i < n; i += 2) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +/* { dg-final { scan-tree-dump-times "basic block vectorized" 4 "slp1" } } >>> */ >>> Index: gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c >>> =================================================================== >>> --- /dev/null 2018-02-02 09:03:36.168354735 +0000 >>> +++ gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-4.c 2018-02-02 >>> 14:03:54.183521863 +0000 >>> @@ -0,0 +1,47 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-additional-options "-fno-tree-loop-vectorize" } */ >>> +/* { dg-require-effective-target lp64 } */ >>> + >>> +void >>> +f1 (double *p, double *q, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = 0; i < n; i += 1) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +void >>> +f2 (double *p, double *q, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = 0; i < n; i += 3) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +void >>> +f3 (double *p, double *q, unsigned int start, unsigned int n) >>> +{ >>> + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); >>> + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); >>> + for (unsigned int i = start; i < n; i += 2) >>> + { >>> + double a = q[i] + p[i]; >>> + double b = q[i + 1] + p[i + 1]; >>> + q[i] = a; >>> + q[i + 1] = b; >>> + } >>> +} >>> + >>> +/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */