The following adds more aggressive DCE to forwprop to clean up dead stmts when folding a stmt leaves some operands unused. The patch uses simple_dce_from_worklist for this purpose, queueing original operands before substitution and folding, but only if we folded the stmt.
This removes one dead stmt biasing threading costs in a later pass but it doesn't resolve the optimization issue in the PR yet. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/111294 * tree-ssa-forwprop.cc (pass_forwprop::execute): Track operands that eventually become dead and use simple_dce_from_worklist to remove their definitions if they did so. * gcc.dg/tree-ssa/evrp10.c: Adjust. * gcc.dg/tree-ssa/evrp6.c: Likewise. * gcc.dg/tree-ssa/forwprop-31.c: Likewise. * gcc.dg/tree-ssa/neg-cast-3.c: Likewise. --- gcc/testsuite/gcc.dg/tree-ssa/evrp10.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/evrp6.c | 5 ++-- gcc/testsuite/gcc.dg/tree-ssa/forwprop-31.c | 3 +-- gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c | 4 +-- gcc/tree-ssa-forwprop.cc | 27 +++++++++++++++++---- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp10.c b/gcc/testsuite/gcc.dg/tree-ssa/evrp10.c index 6ca00e4adaa..776c80c684f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/evrp10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-evrp" }*/ +/* { dg-options "-O2 -fdump-tree-evrp -fno-tree-forwprop" }*/ typedef __INT32_TYPE__ int32_t; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp6.c b/gcc/testsuite/gcc.dg/tree-ssa/evrp6.c index aaeec68866e..0f9561b6a72 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/evrp6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-evrp-details" } */ +/* { dg-options "-O2 -fdump-tree-evrp-details -fdump-tree-mergephi1" } */ extern void abort (void); @@ -18,4 +18,5 @@ foo (int k, int j) return j; } -/* { dg-final { scan-tree-dump "\\\[12, \\+INF" "evrp" } } */ +/* { dg-final { scan-tree-dump "\\\[11, \\+INF" "evrp" } } */ +/* { dg-final { scan-tree-dump-not "abort" "mergephi1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-31.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-31.c index edf80264884..40cc86383fa 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-31.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-31.c @@ -9,6 +9,5 @@ int foo (int x) return w - z; /* becomes 0 */ } -/* Only z = x + 1 is retained. */ -/* { dg-final { scan-tree-dump-times " = " 1 "forwprop1" } } */ +/* { dg-final { scan-tree-dump-times " = " 0 "forwprop1" } } */ /* { dg-final { scan-tree-dump "return 0;" "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c index 7b23ca85d1f..61b89403a93 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/neg-cast-3.c @@ -10,6 +10,4 @@ unsigned f(_Bool a) } /* There should be no cast to int at all. */ -/* Forwprop1 does not remove all of the statements. */ -/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "forwprop1" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "\\\(int\\\)" "forwprop1" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 94ca47a9726..d4e9202a2d4 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "tree-ssa.h" #include "gimple-range.h" +#include "tree-ssa-dce.h" /* This pass propagates the RHS of assignment statements into use sites of the LHS of the assignment. It's basically a specialized @@ -3502,8 +3503,9 @@ pass_forwprop::execute (function *fun) |= EDGE_EXECUTABLE; auto_vec<gimple *, 4> to_fixup; auto_vec<gimple *, 32> to_remove; + auto_bitmap simple_dce_worklist; + auto_bitmap need_ab_cleanup; to_purge = BITMAP_ALLOC (NULL); - bitmap need_ab_cleanup = BITMAP_ALLOC (NULL); for (int i = 0; i < postorder_num; ++i) { gimple_stmt_iterator gsi; @@ -3902,10 +3904,14 @@ pass_forwprop::execute (function *fun) { tree use = USE_FROM_PTR (usep); tree val = fwprop_ssa_val (use); - if (val && val != use && may_propagate_copy (use, val)) + if (val && val != use) { - propagate_value (usep, val); - substituted_p = true; + bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION (use)); + if (may_propagate_copy (use, val)) + { + propagate_value (usep, val); + substituted_p = true; + } } } if (substituted_p @@ -3925,6 +3931,11 @@ pass_forwprop::execute (function *fun) && gimple_call_noreturn_p (stmt)); changed = false; + auto_vec<tree, 8> uses; + FOR_EACH_SSA_USE_OPERAND (usep, stmt, iter, SSA_OP_USE) + if (uses.space (1)) + uses.quick_push (USE_FROM_PTR (usep)); + if (fold_stmt (&gsi, fwprop_ssa_val)) { changed = true; @@ -3935,6 +3946,12 @@ pass_forwprop::execute (function *fun) if (gimple_cond_true_p (cond) || gimple_cond_false_p (cond)) cfg_changed = true; + /* Queue old uses for simple DCE. */ + for (tree use : uses) + if (TREE_CODE (use) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (use)) + bitmap_set_bit (simple_dce_worklist, + SSA_NAME_VERSION (use)); } if (changed || substituted_p) @@ -4115,6 +4132,7 @@ pass_forwprop::execute (function *fun) release_defs (stmt); } } + simple_dce_from_worklist (simple_dce_worklist, to_purge); /* Fixup stmts that became noreturn calls. This may require splitting blocks and thus isn't possible during the walk. Do this @@ -4135,7 +4153,6 @@ pass_forwprop::execute (function *fun) cfg_changed |= gimple_purge_all_dead_eh_edges (to_purge); cfg_changed |= gimple_purge_all_dead_abnormal_call_edges (need_ab_cleanup); BITMAP_FREE (to_purge); - BITMAP_FREE (need_ab_cleanup); if (get_range_query (fun) != get_global_range_query ()) disable_ranger (fun); -- 2.35.3