So it won't do the unsafe truncation for double(1.000000000000001) to float(1.0) since there's precision loss. It's guarded by testcase pr103771-6.c
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? REAL_CST is handled if it can be represented in different floating point types without loss of precision or under fast math. gcc/ChangeLog: PR tree-optimization/103771 * match.pd (cond_expr_convert_p): Extend the match to handle REAL_CST. * tree-vect-patterns.cc (vect_recog_cond_expr_convert_pattern): Handle REAL_CST. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103771-5.c: New test. * gcc.target/i386/pr103771-6.c: New test. --- gcc/match.pd | 33 +++++++++++++ gcc/testsuite/gcc.target/i386/pr103771-5.c | 54 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr103771-6.c | 16 +++++++ gcc/tree-vect-patterns.cc | 31 +++++++++---- 4 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-5.c create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-6.c diff --git a/gcc/match.pd b/gcc/match.pd index 789e3d33326..0c966675a3f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -11346,6 +11346,39 @@ and, && single_use (@4) && single_use (@5)))) +/* Floating point or integer comparison and floating point conversion + with REAL_CST. */ +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3)) + (if (!flag_trapping_math + && SCALAR_FLOAT_TYPE_P (type) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3)) + && !operand_equal_p (TYPE_SIZE (type), + TYPE_SIZE (TREE_TYPE (@0))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@3))) + && single_use (@5) + && (flag_unsafe_math_optimizations + || exact_real_truncate (TYPE_MODE (TREE_TYPE (@3)), + &TREE_REAL_CST (@2))) + && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2)))) + +/* Floating point or integer comparison and floating point conversion + with REAL_CST. */ +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (REAL_CST@3)) + (if (!flag_trapping_math + && SCALAR_FLOAT_TYPE_P (type) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@2))) + && single_use (@4) + && (flag_unsafe_math_optimizations + || exact_real_truncate (TYPE_MODE (TREE_TYPE (@2)), + &TREE_REAL_CST (@3))) + && const_unop (CONVERT_EXPR, TREE_TYPE (@2), @3)))) + (for bit_op (bit_and bit_ior bit_xor) (match (bitwise_induction_p @0 @2 @3) (bit_op:c diff --git a/gcc/testsuite/gcc.target/i386/pr103771-5.c b/gcc/testsuite/gcc.target/i386/pr103771-5.c new file mode 100644 index 00000000000..bf94f53b88c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-5.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O3 -fno-trapping-math -fdump-tree-vect-details" } */ +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" { target { ! ia32 } } } } */ + +void +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + +void +foo1 (int* a, int* b, float* c, float* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + + +void +foo2 (double* a, double* b, double* c, double* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + +void +foo3 (long long* a, long long* b, double* c, double* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + diff --git a/gcc/testsuite/gcc.target/i386/pr103771-6.c b/gcc/testsuite/gcc.target/i386/pr103771-6.c new file mode 100644 index 00000000000..92de6f6249d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-6.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O3 -fno-trapping-math -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-not "vect_recog_cond_expr_convert_pattern" "vect" } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 1 "vect" { target { ! ia32 } } } } */ + +void +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + double tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 1.000000000000001; + e[i] = tmp; + } +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index d8484766cf7..00b699f8144 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1095,7 +1095,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree *type_out) { gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); - tree lhs, match[4], temp, type, new_lhs, op2; + tree lhs, match[4], temp, type, new_lhs, op2, op1; gimple *cond_stmt; gimple *pattern_stmt; enum tree_code code = NOP_EXPR; @@ -1117,19 +1117,34 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1]))) code = FIX_TRUNC_EXPR; + op1 = match[1]; op2 = match[2]; - type = TREE_TYPE (match[1]); - if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) + type = TREE_TYPE (op1); + /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from + SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p. + Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR + or CONVERT_EXPR. */ + if (TREE_CODE (op1) == REAL_CST) { - op2 = vect_recog_temp_ssa_var (type, NULL); - gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); - append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, - get_vectype_for_scalar_type (vinfo, type)); + op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1); + type = TREE_TYPE (op2); + } + else if (TREE_CODE (op2) == REAL_CST) + op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2); + else if (code == NOP_EXPR) + { + if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) + { + op2 = vect_recog_temp_ssa_var (type, NULL); + gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); + append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, + get_vectype_for_scalar_type (vinfo, type)); + } } temp = vect_recog_temp_ssa_var (type, NULL); cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3], - match[1], op2)); + op1, op2)); append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, get_vectype_for_scalar_type (vinfo, type)); new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); -- 2.34.1