The second time I missed patch in one day, I hate myself.
Here it is.
> -----Original Message-----
> From: [email protected] [mailto:gcc-patches-
> [email protected]] On Behalf Of Bin Cheng
> Sent: Monday, February 09, 2015 6:10 PM
> To: [email protected]
> Subject: [PATCH PR64705]Don't aggressively expand induction variable's
base
>
> Hi,
> As comments in the PR, root cause is GCC aggressively expand induction
> variable's base. This patch avoids that by adding new parameter to
> expand_simple_operations thus we can stop expansion whenever ssa var
> referred by IV's step is encountered. As comments in patch, we could stop
> expanding at each ssa var referred by IV's step, but that's expensive and
not
> likely to happen, this patch only extracts the first ssa var and skips
expanding
> accordingly.
> For the new test case, currently the loop body is bloated as below after
> IVOPT:
>
> <bb 5>:
> # ci_28 = PHI <ci_12(D)(4), ci_17(6)>
> # ivtmp.13_31 = PHI <ivtmp.13_25(4), ivtmp.13_27(6)>
> ci_17 = ci_28 + 1;
> _1 = (void *) ivtmp.13_31;
> MEM[base: _1, offset: 0B] = 0;
> ivtmp.13_27 = ivtmp.13_31 + _26;
> _34 = (unsigned long) _13;
> _35 = (unsigned long) flags_8(D);
> _36 = _34 - _35;
> _37 = (unsigned long) step_14;
> _38 = _36 - _37;
> _39 = ivtmp.13_27 + _38;
> _40 = _39 + 3;
> iter_33 = (long int) _40;
> if (len_16(D) >= iter_33)
> goto <bb 6>;
> else
> goto <bb 7>;
>
> <bb 6>:
> goto <bb 5>;
>
> And it can be improved by this patch as below:
>
> <bb 5>:
> # steps_28 = PHI <steps_12(D)(4), steps_17(6)>
> # iter_29 = PHI <iter_15(4), iter_21(6)>
> steps_17 = steps_28 + 1;
> _31 = (sizetype) iter_29;
> MEM[base: flags_8(D), index: _31, offset: 0B] = 0;
> iter_21 = step_14 + iter_29;
> if (len_16(D) >= iter_21)
> goto <bb 6>;
> else
> goto <bb 7>;
>
> <bb 6>:
> goto <bb 5>;
>
>
> I think this is a corner case, it only changes several files' assembly
code
> slightly in spec2k6. Among these files, only one of them is regression.
I
> looked into the regression and thought it was because of passes after
IVOPT.
> The IVOPT dump is at least not worse than the original version.
>
> Bootstrap and test on x86_64 and AArch64, so is it OK?
>
> 2015-02-09 Bin Cheng <[email protected]>
>
> PR tree-optimization/64705
> * tree-ssa-loop-niter.h (expand_simple_operations): New
> parameter.
> * tree-ssa-loop-niter.c (expand_simple_operations): New parameter.
> (tree_simplify_using_condition_1, refine_bounds_using_guard)
> (number_of_iterations_exit): Pass new argument to
> expand_simple_operations.
> * tree-ssa-loop-ivopts.c (extract_single_var_from_expr): New.
> (find_bivs, find_givs_in_stmt_scev): Pass new argument to
> expand_simple_operations. Call extract_single_var_from_expr.
> (difference_cannot_overflow_p): Pass new argument to
> expand_simple_operations.
>
> gcc/testsuite/ChangeLog
> 2015-02-09 Bin Cheng <[email protected]>
>
> PR tree-optimization/64705
> * gcc.dg/tree-ssa/pr64705.c: New test.
>
>
>
>
Index: gcc/tree-ssa-loop-ivopts.c
===================================================================
--- gcc/tree-ssa-loop-ivopts.c (revision 219574)
+++ gcc/tree-ssa-loop-ivopts.c (working copy)
@@ -1070,13 +1070,40 @@ determine_biv_step (gphi *phi)
return integer_zerop (iv.step) ? NULL_TREE : iv.step;
}
+/* Return the first non-invariant ssa var found in EXPR. */
+
+static tree
+extract_single_var_from_expr (tree expr)
+{
+ int i, n;
+ tree tmp;
+ enum tree_code code;
+
+ if (!expr || is_gimple_min_invariant (expr))
+ return NULL;
+
+ code = TREE_CODE (expr);
+ if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
+ {
+ n = TREE_OPERAND_LENGTH (expr);
+ for (i = 0; i < n; i++)
+ {
+ tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
+
+ if (tmp)
+ return tmp;
+ }
+ }
+ return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
+}
+
/* Finds basic ivs. */
static bool
find_bivs (struct ivopts_data *data)
{
gphi *phi;
- tree step, type, base;
+ tree step, type, base, stop;
bool found = false;
struct loop *loop = data->current_loop;
gphi_iterator psi;
@@ -1093,7 +1120,13 @@ find_bivs (struct ivopts_data *data)
continue;
base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
- base = expand_simple_operations (base);
+ /* Stop expanding iv base at the first ssa var referred by iv step.
+ Ideally we should stop at any ssa var, because that's expensive
+ and unusual to happen, we just do it on the first one.
+
+ See PR64705 for the rationale. */
+ stop = extract_single_var_from_expr (step);
+ base = expand_simple_operations (base, stop);
if (contains_abnormal_ssa_name_p (base)
|| contains_abnormal_ssa_name_p (step))
continue;
@@ -1165,7 +1198,7 @@ mark_bivs (struct ivopts_data *data)
static bool
find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
{
- tree lhs;
+ tree lhs, stop;
struct loop *loop = data->current_loop;
iv->base = NULL_TREE;
@@ -1180,13 +1213,20 @@ find_givs_in_stmt_scev (struct ivopts_data *data,
if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
return false;
- iv->base = expand_simple_operations (iv->base);
+ /* Stop expanding iv base at the first ssa var referred by iv step.
+ Ideally we should stop at any ssa var, because that's expensive
+ and unusual to happen, we just do it on the first one.
+
+ See PR64705 for the rationale. */
+ stop = extract_single_var_from_expr (iv->step);
+ iv->base = expand_simple_operations (iv->base, stop);
+
if (contains_abnormal_ssa_name_p (iv->base)
|| contains_abnormal_ssa_name_p (iv->step))
return false;
- /* If STMT could throw, then do not consider STMT as defining a GIV.
+ /* If STMT could throw, then do not consider STMT as defining a GIV.
While this will suppress optimizations, we can not safely delete this
GIV and associated statements, even if it appears it is not used. */
if (stmt_could_throw_p (stmt))
@@ -4486,7 +4526,7 @@ difference_cannot_overflow_p (struct ivopts_data *
if (!nowrap_type_p (TREE_TYPE (base)))
return false;
- base = expand_simple_operations (base);
+ base = expand_simple_operations (base, NULL);
if (TREE_CODE (base) == SSA_NAME)
{
Index: gcc/tree-ssa-loop-niter.c
===================================================================
--- gcc/tree-ssa-loop-niter.c (revision 219574)
+++ gcc/tree-ssa-loop-niter.c (working copy)
@@ -362,8 +362,8 @@ refine_bounds_using_guard (tree type, tree varx, m
mpz_init (offc0);
mpz_init (offc1);
- split_to_var_and_offset (expand_simple_operations (c0), &varc0, offc0);
- split_to_var_and_offset (expand_simple_operations (c1), &varc1, offc1);
+ split_to_var_and_offset (expand_simple_operations (c0, NULL), &varc0, offc0);
+ split_to_var_and_offset (expand_simple_operations (c1, NULL), &varc1, offc1);
/* We are only interested in comparisons of expressions based on VARX and
VARY. TODO -- we might also be able to derive some bounds from
@@ -1552,10 +1552,11 @@ simplify_replace_tree (tree expr, tree old, tree n
}
/* Expand definitions of ssa names in EXPR as long as they are simple
- enough, and return the new expression. */
+ enough, and return the new expression. If STOP is specified, stop
+ expanding if EXPR equals to it. */
tree
-expand_simple_operations (tree expr)
+expand_simple_operations (tree expr, tree stop)
{
unsigned i, n;
tree ret = NULL_TREE, e, ee, e1;
@@ -1568,6 +1569,10 @@ tree
if (is_gimple_min_invariant (expr))
return expr;
+ /* Stop if it's an expression that we don't want to expand. */
+ if (stop && operand_equal_p (expr, stop, 0))
+ return expr;
+
code = TREE_CODE (expr);
if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
{
@@ -1575,7 +1580,7 @@ tree
for (i = 0; i < n; i++)
{
e = TREE_OPERAND (expr, i);
- ee = expand_simple_operations (e);
+ ee = expand_simple_operations (e, stop);
if (e == ee)
continue;
@@ -1614,7 +1619,7 @@ tree
&& src->loop_father != dest->loop_father)
return expr;
- return expand_simple_operations (e);
+ return expand_simple_operations (e, stop);
}
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return expr;
@@ -1634,7 +1639,7 @@ tree
return e;
if (code == SSA_NAME)
- return expand_simple_operations (e);
+ return expand_simple_operations (e, stop);
return expr;
}
@@ -1643,7 +1648,7 @@ tree
{
CASE_CONVERT:
/* Casts are simple. */
- ee = expand_simple_operations (e);
+ ee = expand_simple_operations (e, stop);
return fold_build1 (code, TREE_TYPE (expr), ee);
case PLUS_EXPR:
@@ -1658,7 +1663,7 @@ tree
if (!is_gimple_min_invariant (e1))
return expr;
- ee = expand_simple_operations (e);
+ ee = expand_simple_operations (e, stop);
return fold_build2 (code, TREE_TYPE (expr), ee, e1);
default:
@@ -1758,7 +1763,7 @@ tree_simplify_using_condition_1 (tree cond, tree e
return boolean_true_node;
}
- te = expand_simple_operations (expr);
+ te = expand_simple_operations (expr, NULL);
/* Check whether COND ==> EXPR. */
notcond = invert_truthvalue (cond);
@@ -1784,7 +1789,7 @@ tree_simplify_using_condition_1 (tree cond, tree e
static tree
tree_simplify_using_condition (tree cond, tree expr)
{
- cond = expand_simple_operations (cond);
+ cond = expand_simple_operations (cond, NULL);
return tree_simplify_using_condition_1 (cond, expr);
}
@@ -1994,8 +1999,8 @@ number_of_iterations_exit (struct loop *loop, edge
computing the number of iterations. */
fold_defer_overflow_warnings ();
- iv0.base = expand_simple_operations (iv0.base);
- iv1.base = expand_simple_operations (iv1.base);
+ iv0.base = expand_simple_operations (iv0.base, NULL);
+ iv1.base = expand_simple_operations (iv1.base, NULL);
if (!number_of_iterations_cond (loop, type, &iv0, code, &iv1, niter,
loop_only_exit_p (loop, exit), safe))
{
Index: gcc/tree-ssa-loop-niter.h
===================================================================
--- gcc/tree-ssa-loop-niter.h (revision 219574)
+++ gcc/tree-ssa-loop-niter.h (working copy)
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_TREE_SSA_LOOP_NITER_H
#define GCC_TREE_SSA_LOOP_NITER_H
-extern tree expand_simple_operations (tree);
+extern tree expand_simple_operations (tree, tree);
extern bool loop_only_exit_p (const struct loop *, const_edge);
extern bool number_of_iterations_exit (struct loop *, edge,
struct tree_niter_desc *niter, bool,
Index: gcc/testsuite/gcc.dg/tree-ssa/pr64705.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr64705.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr64705.c (revision 0)
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */
+
+double g;
+
+int foo(char *flags, long len, long i, long steps)
+{
+ register long step, iter;
+
+ if(*(flags + i))
+ {
+ step = i + i + 3;
+ for(iter = i + step ; iter <= len ; iter += step)
+ {
+ steps++;
+ *(flags + iter)=0;
+ }
+ }
+ g = 5.0*(double)steps;
+
+ return 0;
+}
+
+/* Don't expand iv {base+step, step}_loop into {base+x+y, step}_loop
+ even if "step == x + y". */
+/* { dg-final { scan-tree-dump "base step_\[0-9\]* \\+ iter|base iter_\[0-9\]*
\\+ step" "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */