Hi! This patch fixes a couple of issues in optimize_range_tests_var_bound. One is that if we have ranges[i].in_p, we should be inverting the comparison rather than just swapping or not swapping operands. Then because we want to handle only LT/LE, we want to swap operands and the comparison code if we have GT/GE after the optional inversion. And finally, using ranges[i].in_p to determine if we should swap at the end is wrong, in the pr81588 testcase we have a negation in between and thus ranges[i].in_p doesn't match the opcode - so, we should use opcode and if opcode is ERROR_MARK (i.e. the inter-bb range test optimization), we should check oe->rank) and finally shouldn't swap comparison operands, but invert the comparison code if we need to invert.
In the earlier version of the patch, I made a mistake and miscompiled trip_count_to_ahead_ratio_too_small_p in tree-ssa-loop-prefetch.c, so that simplified is also included in another testcase. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.2? 2017-07-31 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/81588 * tree-ssa-reassoc.c (optimize_range_tests_var_bound): If ranges[i].in_p, invert comparison code ccode. For >/>=, swap rhs1 and rhs2 and comparison code unconditionally, for </<= don't do that. Don't swap rhs1/rhs2 again if ranges[i].in_p, instead invert comparison code ccode if opcode or oe->rank is BIT_IOR_EXPR. * gcc.dg/tree-ssa/pr81588.c: New test. * gcc.dg/pr81588.c: New test. * gcc.c-torture/execute/pr81588.c: New test. --- gcc/tree-ssa-reassoc.c.jj 2017-07-31 10:19:22.777332269 +0200 +++ gcc/tree-ssa-reassoc.c 2017-07-31 13:14:33.488618172 +0200 @@ -2958,17 +2958,26 @@ optimize_range_tests_var_bound (enum tre { case GT_EXPR: case GE_EXPR: - if (!ranges[i].in_p) - std::swap (rhs1, rhs2); + case LT_EXPR: + case LE_EXPR: + break; + default: + continue; + } + if (ranges[i].in_p) + ccode = invert_tree_comparison (ccode, false); + switch (ccode) + { + case GT_EXPR: + case GE_EXPR: + std::swap (rhs1, rhs2); ccode = swap_tree_comparison (ccode); break; case LT_EXPR: case LE_EXPR: - if (ranges[i].in_p) - std::swap (rhs1, rhs2); break; default: - continue; + gcc_unreachable (); } int *idx = map->get (rhs1); @@ -3015,8 +3024,14 @@ optimize_range_tests_var_bound (enum tre fprintf (dump_file, "\n"); } - if (ranges[i].in_p) - std::swap (rhs1, rhs2); + operand_entry *oe = (*ops)[ranges[i].idx]; + ranges[i].in_p = 0; + if (opcode == BIT_IOR_EXPR + || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) + { + ranges[i].in_p = 1; + ccode = invert_tree_comparison (ccode, false); + } unsigned int uid = gimple_uid (stmt); gimple_stmt_iterator gsi = gsi_for_stmt (stmt); @@ -3043,7 +3058,6 @@ optimize_range_tests_var_bound (enum tre } else { - operand_entry *oe = (*ops)[ranges[i].idx]; tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node; if (!INTEGRAL_TYPE_P (ctype) || (TREE_CODE (ctype) != BOOLEAN_TYPE @@ -3065,7 +3079,7 @@ optimize_range_tests_var_bound (enum tre ranges[i].high = ranges[i].low; } ranges[i].strict_overflow_p = false; - operand_entry *oe = (*ops)[ranges[*idx].idx]; + oe = (*ops)[ranges[*idx].idx]; /* Now change all the other range test immediate uses, so that those tests will be optimized away. */ if (opcode == ERROR_MARK) --- gcc/testsuite/gcc.dg/tree-ssa/pr81588.c.jj 2017-07-31 12:30:16.917723926 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/pr81588.c 2017-07-31 12:30:16.917723926 +0200 @@ -0,0 +1,15 @@ +/* PR tree-optimization/81588 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-reassoc1-details" } */ + +extern long long int a, c; +extern unsigned short b; + +/* { dg-final { scan-tree-dump-times "Optimizing range test \[^\n\r]* and comparison" 1 "reassoc1" } } */ + +__attribute__((noinline, noclone)) void +foo (void) +{ + if ((b > a) != (1 + (a < 0))) + c = 0; +} --- gcc/testsuite/gcc.dg/pr81588.c.jj 2017-07-31 12:30:16.917723926 +0200 +++ gcc/testsuite/gcc.dg/pr81588.c 2017-07-31 12:30:16.917723926 +0200 @@ -0,0 +1,26 @@ +/* PR tree-optimization/81588 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +long long int a = 5011877430933453486LL, c = 1; +unsigned short b = 24847; + +#include "tree-ssa/pr81588.c" + +int +main () +{ + foo (); + if (c != 0) + __builtin_abort (); + a = 24846; + c = 1; + foo (); + if (c != 1) + __builtin_abort (); + a = -5; + foo (); + if (c != 0) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr81588.c.jj 2017-07-31 13:15:11.832181205 +0200 +++ gcc/testsuite/gcc.c-torture/execute/pr81588.c 2017-07-31 13:15:30.716965993 +0200 @@ -0,0 +1,45 @@ +/* PR tree-optimization/81588 */ + +__attribute__((noinline, noclone)) int +bar (int x) +{ + __asm volatile ("" : : "g" (x) : "memory"); +} + +__attribute__((noinline, noclone)) int +foo (unsigned x, long long y) +{ + if (y < 0) + return 0; + if (y < (long long) (4 * x)) + { + bar (y); + return 1; + } + return 0; +} + +int +main () +{ + volatile unsigned x = 10; + volatile long long y = -10000; + if (foo (x, y) != 0) + __builtin_abort (); + y = -1; + if (foo (x, y) != 0) + __builtin_abort (); + y = 0; + if (foo (x, y) != 1) + __builtin_abort (); + y = 39; + if (foo (x, y) != 1) + __builtin_abort (); + y = 40; + if (foo (x, y) != 0) + __builtin_abort (); + y = 10000; + if (foo (x, y) != 0) + __builtin_abort (); + return 0; +} Jakub