The following adds a simple ad-hoc (matching other code) way to perform sinking and merging of common stores to a CFG merger to the SSA code sinking pass.
On cc1-files (from the GCC 4.7 branch head) this performs 930 such merges out of which 366 are stores with the same value (and thus require no PHI). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. The motivating testcase is that of PR33315 which we now manage to optimize to three straight-line stores from a previously twisted CFG maze. There are a lot of enhancement possibilities but as for all the rest of the sinking code in this pass a proper dataflow driven approach is missing (like SSU-PRE ontop of which sinking can be implemented similar to hoisting ontop of GVN-PRE). Comments? I'll give it overnight SPEC testing (but don't expect any surprises). Thanks, Richard. 2016-07-13 Richard Biener <rguent...@suse.de> PR tree-optimization/33315 * tree-ssa-sink.c: Include tree-eh.h. (sink_code_in_bb): Return TODO_cleanup_cfg if we commonized and sunk stores. Implement store commoning by sinking to the successor. * gcc.dg/tree-ssa/ssa-sink-13.c: New testcase. * gcc.dg/tree-ssa/ssa-sink-14.c: Likewise. Index: gcc/tree-ssa-sink.c =================================================================== *** gcc/tree-ssa-sink.c (revision 238287) --- gcc/tree-ssa-sink.c (working copy) *************** along with GCC; see the file COPYING3. *** 35,40 **** --- 35,41 ---- #include "tree-cfg.h" #include "cfgloop.h" #include "params.h" + #include "tree-eh.h" /* TODO: 1. Sinking store only using scalar promotion (IE without moving the RHS): *************** statement_sink_location (gimple *stmt, b *** 460,466 **** /* Perform code sinking on BB */ ! static void sink_code_in_bb (basic_block bb) { basic_block son; --- 461,467 ---- /* Perform code sinking on BB */ ! static unsigned sink_code_in_bb (basic_block bb) { basic_block son; *************** sink_code_in_bb (basic_block bb) *** 468,473 **** --- 469,628 ---- edge_iterator ei; edge e; bool last = true; + unsigned todo = 0; + + /* Very simplistic code to sink common stores from the predecessor through + our virtual PHI. We do this before sinking stmts from BB as it might + expose sinking opportunities of the merged stores. + Once we have partial dead code elimination through sth like SSU-PRE this + should be moved there. */ + gphi *phi; + if (EDGE_COUNT (bb->preds) > 1 + && (phi = get_virtual_phi (bb))) + { + /* Repeat until no more common stores are found. */ + while (1) + { + gimple *first_store = NULL; + auto_vec <tree, 5> vdefs; + + /* Search for common stores defined by all virtual PHI args. + ??? Common stores not present in all predecessors could + be handled by inserting a forwarder to sink to. Generally + this involves deciding which stores to do this for if + multiple common stores are present for different sets of + predecessors. See PR11832 for an interesting case. */ + for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) + { + tree arg = gimple_phi_arg_def (phi, i); + gimple *def = SSA_NAME_DEF_STMT (arg); + if (! is_gimple_assign (def) + || stmt_can_throw_internal (def)) + { + /* ??? We could handle some cascading with the def being + another PHI. We'd have to insert multiple PHIs for + the rhs then though (if they are not all equal). */ + first_store = NULL; + break; + } + /* ??? Do not try to do anything fancy with aliasing, thus + do not sink across non-aliased loads (or even stores, + so different store order will make the sinking fail). */ + bool all_uses_on_phi = true; + imm_use_iterator iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, iter, arg) + if (USE_STMT (use_p) != phi) + all_uses_on_phi = false; + if (! all_uses_on_phi) + { + first_store = NULL; + break; + } + if (! first_store) + first_store = def; + /* ??? We could handle differing SSA uses in the LHS by inserting + PHIs for them. */ + else if (! operand_equal_p (gimple_assign_lhs (first_store), + gimple_assign_lhs (def), 0)) + { + first_store = NULL; + break; + } + TREE_VISITED (arg) = 1; + vdefs.safe_push (arg); + } + if (! first_store) + break; + + /* Check if we need a PHI node to merge the stored values. */ + bool allsame = true; + for (unsigned i = 1; i < vdefs.length (); ++i) + { + gimple *def = SSA_NAME_DEF_STMT (vdefs[i]); + if (! operand_equal_p (gimple_assign_rhs1 (first_store), + gimple_assign_rhs1 (def), 0)) + { + allsame = false; + break; + } + } + + /* We cannot handle aggregate values if we need to merge them. */ + tree type = TREE_TYPE (gimple_assign_lhs (first_store)); + if (! allsame + && ! is_gimple_reg_type (type)) + break; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, + gimple_location (first_store), + "sinking common stores %sto ", + allsame ? "with same value " : ""); + dump_generic_expr (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, + gimple_assign_lhs (first_store)); + dump_printf (MSG_OPTIMIZED_LOCATIONS, "\n"); + } + + /* Insert a PHI to merge differing stored values if necessary. + Note that in general inserting PHIs isn't a very good idea as + it makes the job of coalescing and register allocation harder. + Even common SSA uses on the rhs/lhs might extend their lifetime + across multiple edges by this code motion which makes + register allocation harder. */ + tree from; + if (! allsame) + { + from = make_ssa_name (type); + gphi *newphi = create_phi_node (from, bb); + for (unsigned i = 0; i < vdefs.length (); ++i) + { + gimple *def = SSA_NAME_DEF_STMT (vdefs[i]); + add_phi_arg (newphi, gimple_assign_rhs1 (def), + EDGE_PRED (bb, i), UNKNOWN_LOCATION); + } + } + else + from = gimple_assign_rhs1 (first_store); + + /* Remove all stores. */ + for (unsigned i = 0; i < vdefs.length (); ++i) + { + /* If we have more than one use of a VDEF on the PHI make sure + we remove the defining stmt only once. */ + if (TREE_VISITED (vdefs[i])) + { + TREE_VISITED (vdefs[i]) = 0; + gimple *def = SSA_NAME_DEF_STMT (vdefs[i]); + gsi = gsi_for_stmt (def); + unlink_stmt_vdef (def); + gsi_remove (&gsi, false); + release_defs (def); + } + } + + /* Insert the first store at the beginning of the merge BB. */ + gimple_set_vdef (first_store, gimple_phi_result (phi)); + SSA_NAME_DEF_STMT (gimple_vdef (first_store)) = first_store; + gimple_phi_set_result (phi, make_ssa_name (gimple_vop (cfun))); + gimple_set_vuse (first_store, gimple_phi_result (phi)); + gimple_assign_set_rhs1 (first_store, from); + /* ??? Should we reset first_stores location? */ + gsi = gsi_after_labels (bb); + gsi_insert_before (&gsi, first_store, GSI_SAME_STMT); + + todo |= TODO_cleanup_cfg; + } + + /* We could now have empty predecessors that we could remove, + forming a proper CFG for further sinking. Note that even + CFG cleanup doesn't do this fully at the moment and it + doesn't preserve post-dominators in the process either. + The mergephi pass might do it though. gcc.dg/tree-ssa/ssa-sink-13.c + shows this nicely if you disable tail merging or (same effect) + make the stored values unequal. */ + } /* If this block doesn't dominate anything, there can't be any place to sink the statements to. */ *************** sink_code_in_bb (basic_block bb) *** 543,550 **** son; son = next_dom_son (CDI_POST_DOMINATORS, son)) { ! sink_code_in_bb (son); } } /* Perform code sinking. --- 698,707 ---- son; son = next_dom_son (CDI_POST_DOMINATORS, son)) { ! todo |= sink_code_in_bb (son); } + + return todo; } /* Perform code sinking. *************** pass_sink_code::execute (function *fun) *** 622,634 **** memset (&sink_stats, 0, sizeof (sink_stats)); calculate_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_POST_DOMINATORS); ! sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun)); statistics_counter_event (fun, "Sunk statements", sink_stats.sunk); free_dominance_info (CDI_POST_DOMINATORS); remove_fake_exit_edges (); loop_optimizer_finalize (); ! return 0; } } // anon namespace --- 779,791 ---- memset (&sink_stats, 0, sizeof (sink_stats)); calculate_dominance_info (CDI_DOMINATORS); calculate_dominance_info (CDI_POST_DOMINATORS); ! unsigned todo = sink_code_in_bb (EXIT_BLOCK_PTR_FOR_FN (fun)); statistics_counter_event (fun, "Sunk statements", sink_stats.sunk); free_dominance_info (CDI_POST_DOMINATORS); remove_fake_exit_edges (); loop_optimizer_finalize (); ! return todo; } } // anon namespace Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c (working copy) *************** *** 0 **** --- 1,25 ---- + /* PR33315 */ + /* { dg-do compile } */ + /* { dg-options "-O2 -fdump-tree-sink" } */ + + int num; + int a[20]; + + void test () + { + int i; + int *ptr; + ptr = & a[0]; + i = num; + if ( i == 1) *(ptr+0) = 0; + if ( i != 1) *(ptr+0) = 0; + if ( i == 2) *(ptr+1) = 0; + if ( i != 2) *(ptr+1) = 0; + if ( i == 3) *(ptr+2) = 0; + if ( i != 3) *(ptr+2) = 0; + } + + /* We should sink/merge all stores and end up with a single BB. */ + + /* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */ + /* { dg-final { scan-tree-dump-times "<bb " 1 "sink" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c (revision 0) --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c (working copy) *************** *** 0 **** --- 1,17 ---- + /* { dg-do compile } */ + /* { dg-options "-O2 -fdump-tree-sink" } */ + + int x; + void foo (int b) + { + if (b) + x = b; + else + x = 2; + } + + /* We should have sunk the store and inserted a PHI to merge the + stored values. */ + + /* { dg-final { scan-tree-dump-times " = PHI" 1 "sink" } } */ + /* { dg-final { scan-tree-dump-times "x = " 1 "sink" } } */