On January 8, 2016 9:07:11 PM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >As mentioned in the PRs, the X % -Y to X % Y optimization for signed >modulo is not valid unless we can prove that it won't be INT_MIN % >-(-1), >which is valid, but where INT_MIN % -1 is invalid. > >The following patch use range info to figure that out. >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Thanks, Richard. >2016-01-08 Jakub Jelinek <ja...@redhat.com> > > PR middle-end/50865 > PR tree-optimization/69097 > * fold-const.h (expr_not_equal_to): New prototype. > * fold-const.c: Include stringpool.h and tree-ssanames.h. > (expr_not_equal_to): New function. > * match.pd (X % -Y is the same as X % Y): Don't optimize > unless X is known not to be equal to minimum or Y is known > not to be equal to -1. > * tree-vrp.c (simplify_div_or_mod_using_ranges): Add GSI argument. > fold TRUNC_MOD_EXPR if the second argument is not a power of two. > (simplify_stmt_using_ranges): Adjust caller. > (vrp_finalize): Call set_value_range on SSA_NAMEs before calling > substitute_and_fold. > > * gcc.c-torture/execute/pr50865.c: New test. > * gcc.c-torture/execute/pr69097-1.c: New test. > * gcc.c-torture/execute/pr69097-2.c: New test. > * gcc.dg/pr69097-1.c: New test. > * gcc.dg/pr69097-2.c: New test. > >--- gcc/fold-const.h.jj 2016-01-05 12:38:17.020555325 +0100 >+++ gcc/fold-const.h 2016-01-08 13:22:25.921536489 +0100 >@@ -177,6 +177,7 @@ extern bool merge_ranges (int *, tree *, > tree, tree); > extern tree sign_bit_p (tree, const_tree); > extern tree exact_inverse (tree, tree); >+extern bool expr_not_equal_to (tree t, const wide_int &); > extern tree const_unop (enum tree_code, tree, tree); > extern tree const_binop (enum tree_code, tree, tree, tree); > extern bool negate_mathfn_p (combined_fn); >--- gcc/fold-const.c.jj 2016-01-05 12:38:17.011555454 +0100 >+++ gcc/fold-const.c 2016-01-08 13:22:25.925536432 +0100 >@@ -74,6 +74,8 @@ along with GCC; see the file COPYING3. > #include "tree-into-ssa.h" > #include "md5.h" > #include "case-cfn-macros.h" >+#include "stringpool.h" >+#include "tree-ssanames.h" > > #ifndef LOAD_EXTEND_OP > #define LOAD_EXTEND_OP(M) UNKNOWN >@@ -9101,6 +9103,45 @@ tree_expr_nonzero_p (tree t) > return ret; > } > >+/* Return true if T is known not to be equal to an integer W. */ >+ >+bool >+expr_not_equal_to (tree t, const wide_int &w) >+{ >+ wide_int min, max, nz; >+ value_range_type rtype; >+ switch (TREE_CODE (t)) >+ { >+ case INTEGER_CST: >+ return wi::ne_p (t, w); >+ >+ case SSA_NAME: >+ if (!INTEGRAL_TYPE_P (TREE_TYPE (t))) >+ return false; >+ rtype = get_range_info (t, &min, &max); >+ if (rtype == VR_RANGE) >+ { >+ if (wi::lt_p (max, w, TYPE_SIGN (TREE_TYPE (t)))) >+ return true; >+ if (wi::lt_p (w, min, TYPE_SIGN (TREE_TYPE (t)))) >+ return true; >+ } >+ else if (rtype == VR_ANTI_RANGE >+ && wi::le_p (min, w, TYPE_SIGN (TREE_TYPE (t))) >+ && wi::le_p (w, max, TYPE_SIGN (TREE_TYPE (t)))) >+ return true; >+ /* If T has some known zero bits and W has any of those bits >set, >+ then T is known not to be equal to W. */ >+ if (wi::ne_p (wi::zext (wi::bit_and_not (w, get_nonzero_bits >(t)), >+ TYPE_PRECISION (TREE_TYPE (t))), 0)) >+ return true; >+ return false; >+ >+ default: >+ return false; >+ } >+} >+ > /* Fold a binary expression of code CODE and type TYPE with operands > OP0 and OP1. LOC is the location of the resulting expression. > Return the folded expression if folding is successful. Otherwise, >--- gcc/match.pd.jj 2016-01-05 12:38:16.979555912 +0100 >+++ gcc/match.pd 2016-01-08 13:23:28.815653802 +0100 >@@ -295,7 +295,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (trunc_mod @0 (convert? (negate @1))) > (if (!TYPE_UNSIGNED (type) > && !TYPE_OVERFLOW_TRAPS (type) >- && tree_nop_conversion_p (type, TREE_TYPE (@1))) >+ && tree_nop_conversion_p (type, TREE_TYPE (@1)) >+ /* Avoid this transformation if X might be INT_MIN or >+ Y might be -1, because we would then change valid >+ INT_MIN % -(-1) into invalid INT_MIN % -1. */ >+ && (expr_not_equal_to (@0, TYPE_MIN_VALUE (type)) >+ || expr_not_equal_to (@1, wi::minus_one (TYPE_PRECISION >+ (TREE_TYPE (@1)))))) > (trunc_mod @0 (convert @1)))) > > /* X - (X / Y) * Y is the same as X % Y. */ >--- gcc/tree-vrp.c.jj 2016-01-04 14:55:51.000000000 +0100 >+++ gcc/tree-vrp.c 2016-01-08 13:53:11.101943765 +0100 >@@ -8942,7 +8942,7 @@ simplify_truth_ops_using_ranges (gimple_ > modulo. */ > > static bool >-simplify_div_or_mod_using_ranges (gimple *stmt) >+simplify_div_or_mod_using_ranges (gimple_stmt_iterator *gsi, gimple >*stmt) > { > enum tree_code rhs_code = gimple_assign_rhs_code (stmt); > tree val = NULL; >@@ -8971,12 +8971,19 @@ simplify_div_or_mod_using_ranges (gimple > } > > if (!integer_pow2p (op1)) >- return false; >- >- if (TYPE_UNSIGNED (TREE_TYPE (op0))) > { >- val = integer_one_node; >+ /* X % -Y can be only optimized into X % Y either if >+ X is not INT_MIN, or Y is not -1. Fold it now, as after >+ remove_range_assertions the range info might be not available >+ anymore. */ >+ if (rhs_code == TRUNC_MOD_EXPR >+ && fold_stmt (gsi, follow_single_use_edges)) >+ return true; >+ return false; > } >+ >+ if (TYPE_UNSIGNED (TREE_TYPE (op0))) >+ val = integer_one_node; > else > { > bool sop = false; >@@ -9890,7 +9897,7 @@ simplify_stmt_using_ranges (gimple_stmt_ > case TRUNC_MOD_EXPR: > if (TREE_CODE (rhs1) == SSA_NAME > && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))) >- return simplify_div_or_mod_using_ranges (stmt); >+ return simplify_div_or_mod_using_ranges (gsi, stmt); > break; > > /* Transform ABS (X) into X or -X as appropriate. */ >@@ -10200,16 +10207,6 @@ vrp_finalize (bool warn_array_bounds_p) > fprintf (dump_file, "\n"); > } > >- substitute_and_fold (op_with_constant_singleton_value_range, >- vrp_fold_stmt, false); >- >- if (warn_array_bounds && warn_array_bounds_p) >- check_all_array_refs (); >- >- /* We must identify jump threading opportunities before we release >- the datastructures built by VRP. */ >- identify_jump_threads (); >- > /* Set value range to non pointer SSA_NAMEs. */ > for (i = 0; i < num_vr_values; i++) > if (vr_value[i]) >@@ -10230,6 +10227,16 @@ vrp_finalize (bool warn_array_bounds_p) > vr_value[i]->max); > } > >+ substitute_and_fold (op_with_constant_singleton_value_range, >+ vrp_fold_stmt, false); >+ >+ if (warn_array_bounds && warn_array_bounds_p) >+ check_all_array_refs (); >+ >+ /* We must identify jump threading opportunities before we release >+ the datastructures built by VRP. */ >+ identify_jump_threads (); >+ > /* Free allocated memory. */ > for (i = 0; i < num_vr_values; i++) > if (vr_value[i]) >--- gcc/testsuite/gcc.c-torture/execute/pr50865.c.jj 2016-01-08 >14:44:28.749265388 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr50865.c 2016-01-08 >14:52:07.000000000 +0100 >@@ -0,0 +1,27 @@ >+/* PR middle-end/50865 */ >+ >+#define INT64_MIN (-__LONG_LONG_MAX__ - 1) >+ >+int >+main () >+{ >+ volatile long long l1 = 1; >+ volatile long long l2 = -1; >+ volatile long long l3 = -1; >+ >+ if ((INT64_MIN % 1LL) != 0) >+ __builtin_abort (); >+ if ((INT64_MIN % l1) != 0) >+ __builtin_abort (); >+ if (l2 == -1) >+ { >+ if ((INT64_MIN % 1LL) != 0) >+ __builtin_abort (); >+ } >+ else if ((INT64_MIN % -l2) != 0) >+ __builtin_abort (); >+ if ((INT64_MIN % -l3) != 0) >+ __builtin_abort (); >+ >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr69097-1.c.jj 2016-01-08 >14:08:11.577422788 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr69097-1.c 2016-01-08 >14:06:51.000000000 +0100 >@@ -0,0 +1,14 @@ >+/* PR tree-optimization/69097 */ >+ >+int a, b; >+unsigned int c; >+ >+int >+main () >+{ >+ int d = b; >+ b = ~(~a + (~d | b)); >+ a = ~(~c >> b); >+ c = a % b; >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr69097-2.c.jj 2016-01-08 >14:08:14.726378977 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr69097-2.c 2016-01-08 >14:07:49.000000000 +0100 >@@ -0,0 +1,30 @@ >+/* PR tree-optimization/69097 */ >+ >+__attribute__((noinline, noclone)) int >+f1 (int x, int y) >+{ >+ return x % y; >+} >+ >+__attribute__((noinline, noclone)) int >+f2 (int x, int y) >+{ >+ return x % -y; >+} >+ >+__attribute__((noinline, noclone)) int >+f3 (int x, int y) >+{ >+ int z = -y; >+ return x % z; >+} >+ >+int >+main () >+{ >+ if (f1 (-__INT_MAX__ - 1, 1) != 0 >+ || f2 (-__INT_MAX__ - 1, -1) != 0 >+ || f3 (-__INT_MAX__ - 1, -1) != 0) >+ __builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.dg/pr69097-1.c.jj 2016-01-08 14:23:01.631064387 >+0100 >+++ gcc/testsuite/gcc.dg/pr69097-1.c 2016-01-08 14:02:45.000000000 >+0100 >@@ -0,0 +1,140 @@ >+/* PR tree-optimization/69097 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+/* All the x % -y below should be optimized into x % y, as >+ it should never be INT_MIN % -(-1). */ >+/* { dg-final { scan-tree-dump-not "-y" "optimized" } } */ >+ >+int >+f1 (int x, int y) >+{ >+ if (x == -__INT_MAX__ - 1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f2 (int x, int y) >+{ >+ if (x < -__INT_MAX__) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f3 (int x, int y) >+{ >+ if (y == -1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f4 (int x, int y) >+{ >+ if (y < 0) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f5 (int x, int y) >+{ >+ if (y >= -1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f6 (int x, int y) >+{ >+ if (y < 0 || y > 24) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f7 (int x, int y) >+{ >+ if (y <= -17 || y >= -1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f8 (int x, int y) >+{ >+ if (y >= -13 && y <= 15) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f9 (int x, int y) >+{ >+ return x % -(y & ~4); >+} >+ >+int >+f10 (int x, int y) >+{ >+ if (x != -__INT_MAX__ - 1) >+ return x % -y; >+ return 34; >+} >+ >+int >+f11 (int x, int y) >+{ >+ if (x >= -__INT_MAX__) >+ return x % -y; >+ return 34; >+} >+ >+int >+f12 (int x, int y) >+{ >+ if (y != -1) >+ return x % -y; >+ return 34; >+} >+ >+int >+f13 (int x, int y) >+{ >+ if (y >= 0) >+ return x % -y; >+ return 34; >+} >+ >+int >+f14 (int x, int y) >+{ >+ if (y < -1) >+ return x % -y; >+ return 34; >+} >+ >+int >+f15 (int x, int y) >+{ >+ if (y >= 0 && y <= 24) >+ return x % -y; >+ return 34; >+} >+ >+int >+f16 (int x, int y) >+{ >+ if (y > -17 && y < -1) >+ return x % -y; >+ return 34; >+} >+ >+int >+f17 (int x, int y) >+{ >+ if (y < -13 || y > 15) >+ return x % -y; >+ return 34; >+} >--- gcc/testsuite/gcc.dg/pr69097-2.c.jj 2016-01-08 14:23:14.032892362 >+0100 >+++ gcc/testsuite/gcc.dg/pr69097-2.c 2016-01-08 14:29:59.550327493 >+0100 >@@ -0,0 +1,138 @@ >+/* PR tree-optimization/69097 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2 -fdump-tree-optimized" } */ >+/* { dg-final { scan-tree-dump-times "-y" 17 "optimized" } } */ >+ >+int >+f1 (int x, int y) >+{ >+ if (x == -__INT_MAX__) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f2 (int x, int y) >+{ >+ if (x >= -__INT_MAX__ + 1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f3 (int x, int y) >+{ >+ if (y == -2) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f4 (int x, int y) >+{ >+ if (y < -1) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f5 (int x, int y) >+{ >+ if (y >= 0) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f6 (int x, int y) >+{ >+ if (y < -1 || y > 24) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f7 (int x, int y) >+{ >+ if (y <= -17 || y >= 0) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f8 (int x, int y) >+{ >+ if (y >= -13 && y <= -2) >+ __builtin_unreachable (); >+ return x % -y; >+} >+ >+int >+f9 (int x, int y) >+{ >+ return x % -y; >+} >+ >+int >+f10 (int x, int y) >+{ >+ if (x != -__INT_MAX__) >+ return x % -y; >+ return 34; >+} >+ >+int >+f11 (int x, int y) >+{ >+ if (x < -__INT_MAX__ + 2) >+ return x % -y; >+ return 34; >+} >+ >+int >+f12 (int x, int y) >+{ >+ if (y != -2) >+ return x % -y; >+ return 34; >+} >+ >+int >+f13 (int x, int y) >+{ >+ if (y >= -1) >+ return x % -y; >+ return 34; >+} >+ >+int >+f14 (int x, int y) >+{ >+ if (y < 0) >+ return x % -y; >+ return 34; >+} >+ >+int >+f15 (int x, int y) >+{ >+ if (y >= -1 && y <= 24) >+ return x % -y; >+ return 34; >+} >+ >+int >+f16 (int x, int y) >+{ >+ if (y > -17 && y < 0) >+ return x % -y; >+ return 34; >+} >+ >+int >+f17 (int x, int y) >+{ >+ if (y < -13 || y > -4) >+ return x % -y; >+ return 34; >+} > > Jakub