Richard, this patch fixes PR52743.
The problem is as follows: blocks 3 and 5, with successor 6 are considered equal and merged. ... # BLOCK 3 freq:6102 # PRED: 2 [61.0%] (true,exec) # VUSE <.MEMD.1734_10> dddD.1710_3 = bbbD.1703; goto <bb 6>; # SUCC: 6 [100.0%] (fallthru,exec) # BLOCK 5 freq:2378 # PRED: 4 [61.0%] (false,exec) # SUCC: 6 [100.0%] (fallthru,exec) # BLOCK 6 freq:10000 # PRED: 3 [100.0%] (fallthru,exec) 7 [100.0%] (fallthru) 5 [100.0%] (fallthru,exec) # dddD.1710_1 = PHI <dddD.1710_3(3), 0(7), dddD.1710_4(5)> # .MEMD.1734_8 = PHI <.MEMD.1734_10(3), .MEMD.1734_11(7), .MEMD.1734_11(5)> # VUSE <.MEMD.1734_8> return dddD.1710_1; # SUCC: EXIT [100.0%] ... Tail merge considers 2 blocks equal if the effect at the tail is equal, meaning: - the sequence of side effects produced by each block is equal - the value phis are equal There are no side effects in block 3 and 5, and the phi alternatives of dddD.1710_1 for 3 (dddD.1710_3) and 5 (dddD.1710_4) are proven equal by gvn. The problem is that changing the (4->5) edge into a (4->3) edge changes the value of dddD.1710_3, because block 4 contains a store that affects the load in block 3. ... # BLOCK 4 freq:3898 # PRED: 2 [39.0%] (false,exec) # VUSE <.MEMD.1734_10> dddD.1710_4 = bbbD.1703; # .MEMD.1734_11 = VDEF <.MEMD.1734_10> # USE = nonlocal null # CLB = nonlocal null D.1724_5 = aaaD.1705 (); if (D.1724_5 != 0) goto <bb 7>; else goto <bb 5>; # SUCC: 7 [39.0%] (true,exec) 5 [61.0%] (false,exec) ... Or, put differently, the incoming vuse of block 3 affects a value phi alternative for that block (dddD.1710_3), so the 2 blocks are equal only under the condition that the incoming vuses are equal. We could build an analysis that addresses that precisely, but for now I implemented a more coarse-grained fix: if the incoming vuses are not equal, and at least one of the vuses influenced a non-virtual result, we don't consider the blocks equal. Bootstrapped and reg-tested on x86_64. ok for trunk, 4.7.1? Thanks, - Tom 2012-04-13 Tom de Vries <t...@codesourcery.com> * tree-ssa-tail-merge.c (gsi_advance_bw_nondebug_nonlocal): Add parameters vuse and vuse_escaped. (find_duplicate): Init vuse1, vuse2 and vuse_escaped. Pass to gsi_advance_bw_nondebug_nonlocal. Return if vuse_escaped and vuse1 != vuse2. * gcc.dg/pr52734.c: New test.
Index: gcc/tree-ssa-tail-merge.c =================================================================== --- gcc/tree-ssa-tail-merge.c (revision 185028) +++ gcc/tree-ssa-tail-merge.c (working copy) @@ -1123,18 +1123,31 @@ gimple_equal_p (same_succ same_succ, gim } } -/* Let GSI skip backwards over local defs. */ +/* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE. + Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the + processed statements. */ static void -gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi) +gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse, + bool *vuse_escaped) { gimple stmt; + tree lvuse; while (true) { if (gsi_end_p (*gsi)) return; stmt = gsi_stmt (*gsi); + + lvuse = gimple_vuse (stmt); + if (lvuse != NULL_TREE) + { + *vuse = lvuse; + if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF)) + *vuse_escaped = true; + } + if (!(is_gimple_assign (stmt) && local_def (gimple_get_lhs (stmt)) && !gimple_has_side_effects (stmt))) return; @@ -1150,9 +1163,11 @@ find_duplicate (same_succ same_succ, bas { gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1); gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2); + tree vuse1 = NULL_TREE, vuse2 = NULL_TREE; + bool vuse_escaped = false; - gsi_advance_bw_nondebug_nonlocal (&gsi1); - gsi_advance_bw_nondebug_nonlocal (&gsi2); + gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); + gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2)) { @@ -1161,13 +1176,20 @@ find_duplicate (same_succ same_succ, bas gsi_prev_nondebug (&gsi1); gsi_prev_nondebug (&gsi2); - gsi_advance_bw_nondebug_nonlocal (&gsi1); - gsi_advance_bw_nondebug_nonlocal (&gsi2); + gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped); + gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped); } if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2))) return; + /* If the incoming vuses are not the same, and the vuse escaped into an + SSA_OP_DEF, then merging the 2 blocks will change the value of the def, + which potentially means the semantics of one of the blocks will be changed. + TODO: make this check more precise. */ + if (vuse_escaped && vuse1 != vuse2) + return; + if (dump_file) fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n", bb1->index, bb2->index); Index: gcc/testsuite/gcc.dg/pr52734.c =================================================================== --- /dev/null (new file) +++ gcc/testsuite/gcc.dg/pr52734.c (revision 0) @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int bbb = 0; + +int __attribute__((noinline,noclone)) aaa(void) +{ + ++bbb; + return 0; +} + +int __attribute__((noinline,noclone)) ccc(void) +{ + int ddd; + /* bbb == 0 */ + if (aaa()) + return bbb; + + /* bbb == 1 */ + ddd = bbb; + /* bbb == ddd == 1 */ + if (aaa ()) + return 0; + /* bbb == 2, ddd == 1 */ + + return ddd; +} + +int main(void) +{ + if (ccc() != 1) + __builtin_abort(); + return 0; +} +