Hi Richard, >Not digged long into this "interesting" function but this case is only valid if type == >final type and if the result is not shifted back. vect_recog_over_widening_pattern >works on a whole sequence of stmts after all, thus > > b = (T_PROMOTED) a; > c = b << 2; > d = b >> 2; > e = (T_ORIG) b; > >would be miscompiled by your new case.
Here is the followup patch. It supports half type left shift operation in vect_recog_over_widening_patter function. As you suggested, the patch keeps half type lshift flag and gives up on right shift operation or different new_type/use_type cases. Two test cases are added, one should be recognized as pattern and the other shouldn't. Bootstrap OK and no gcc/g++ regression on x86_64/AArch64. Does this look OK? 2017-11-20 Jon Beniston <j...@beniston.com> gcc/ * tree-vect-patterns.c (vect_operation_fits_smaller_type): New parameter. Support half type lshift. (vect_recog_over_widening_patter): Support half type lshift. gcc/testsuite * gcc.dg/vect/vect-over-widen-5.c: New test. * gcc.dg/vect/vect-over-widen-6.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c new file mode 100644 index 0000000..a3e5a44 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-5.c @@ -0,0 +1,46 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 256 + +short a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +short b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) +int foo (void) +{ + int i; + + for (i=0; i<N; i++) + { + a[i] = (b[i] << 4); + } +} + +int main (void) +{ + int i; + + check_vect (); + + for (i=0; i<N; i++) { + a[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i=0; i<N; i++) { + if (a[i] != (b[i] << 4)) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c b/gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c new file mode 100644 index 0000000..1979eee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-over-widen-6.c @@ -0,0 +1,48 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 256 + +short a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +short b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); + +__attribute__ ((noinline)) +int foo (void) +{ + int i; + + for (i=0; i<N; i++) + { + int x = (b[i] << 4); + x = (x ^ 63); + x = (x >> 2); + a[i] = x; + } +} + +int main (void) +{ + int i; + + check_vect (); + + for (i=0; i<N; i++) { + a[i] = i; + __asm__ volatile (""); + } + + foo (); + + for (i=0; i<N; i++) { + if (a[i] != (((b[i] << 4) ^ 63) >> 2)) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-not "vect_recog_over_widening_pattern: detected" "vect" } } */ +/* { dg-final { scan-tree-dump-not "pattern recognized" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 1cd6e57..daadcfb 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1240,12 +1240,15 @@ vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in, statements for STMT: the first one is a type promotion and the second one is the operation itself. We return the type promotion statement in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of - the second pattern statement. */ + the second pattern statement. + HALF_TYPE_LSHIFT_P - Set to TRUE if STMT is left shift operation can be + done in smaller type that has half precision of promoted type. */ static bool vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type, tree *op0, tree *op1, gimple **new_def_stmt, - vec<gimple *> *stmts) + vec<gimple *> *stmts, + bool *half_type_lshift_p) { enum tree_code code; tree const_oprnd, oprnd; @@ -1296,6 +1299,7 @@ vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type, return false; } + unsigned HOST_WIDE_INT half_prec = TYPE_PRECISION (half_type); /* Can we perform the operation on a smaller type? */ switch (code) { @@ -1305,11 +1309,11 @@ vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type, if (!int_fits_type_p (const_oprnd, half_type)) { /* HALF_TYPE is not enough. Try a bigger type if possible. */ - if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) + if (TYPE_PRECISION (type) < (half_prec * 4)) return false; - interm_type = build_nonstandard_integer_type ( - TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); + interm_type = build_nonstandard_integer_type (half_prec * 2, + TYPE_UNSIGNED (type)); if (!int_fits_type_p (const_oprnd, interm_type)) return false; } @@ -1317,34 +1321,43 @@ vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type, break; case LSHIFT_EXPR: - /* Try intermediate type - HALF_TYPE is not enough for sure. */ - if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) - return false; + /* Try intermediate type - smaller than HALF_TYPE. */ + if (TYPE_PRECISION (type) >= (half_prec * 4)) + { + interm_type = build_nonstandard_integer_type (half_prec * 2, + TYPE_UNSIGNED (type)); + if (!vect_supportable_shift (code, interm_type)) + return false; + } + /* Try half type. */ + else if (TYPE_PRECISION (type) == half_prec * 2 + && vect_supportable_shift (code, half_type)) + *half_type_lshift_p = true; + else + return false; /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. (e.g., if the original value was char, the shift amount is at most 8 if we want to use short). */ - if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) - return false; - - interm_type = build_nonstandard_integer_type ( - TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); - - if (!vect_supportable_shift (code, interm_type)) + if (compare_tree_int (const_oprnd, half_prec) == 1) return false; break; case RSHIFT_EXPR: + /* Right shift is not allowed if we had half type left shift. */ + if (*half_type_lshift_p) + return false; + if (vect_supportable_shift (code, half_type)) break; /* Try intermediate type - HALF_TYPE is not supported. */ - if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) + if (TYPE_PRECISION (type) < (half_prec * 4)) return false; - interm_type = build_nonstandard_integer_type ( - TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); + interm_type = build_nonstandard_integer_type (half_prec * 2, + TYPE_UNSIGNED (type)); if (!vect_supportable_shift (code, interm_type)) return false; @@ -1456,7 +1469,7 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, *use_stmt = NULL; tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd; - bool first; + bool first, half_type_lshift_p = false; tree type = NULL; first = true; @@ -1469,7 +1482,7 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, new_def_stmt = NULL; if (!vect_operation_fits_smaller_type (stmt, var, &new_type, &op0, &op1, &new_def_stmt, - stmts)) + stmts, &half_type_lshift_p)) { if (first) return NULL; @@ -1534,6 +1547,12 @@ vect_recog_over_widening_pattern (vec<gimple *> *stmts, if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) return NULL; + /* Check that NEW_TYPE and USE_TYPE have the same precision if we + had left shift in half precision of the original promoted type. */ + if (half_type_lshift_p + && TYPE_PRECISION (new_type) < TYPE_PRECISION (use_type)) + return NULL; + if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) {