Hi, this patches updates profile after hoist_guard transformation that was added in 2015. I wonder why this transofrm is bundled in tree-ssa-loop-unswitch and not enabled at -O2/-Os. It converts
while (1) { [header]] loop_phi_nodes; something1; if (cond1) body; nvar = phi(orig, bvar) ... for all variables changed in body; [guard_end] something2; if (cond2) break; something3; } to if (cond1) while (1) { [header]] loop_phi_nodes; something1; body; [guard_end] something2; if (cond2) break; something3; } Which, unlike normal if conversion seems almost always win becuase it does not duplicate any code. While path where loop executes 0 times has one extra if (cond1) on it, this seems to be quite reasonable tradeoff. Bootstrapped/regtested x86_64-linux, will commit it tomorrow unless there are complains. * gcc.dg/loop-unswitch-2.c: New testcase. * gcc.dg/loop-unswitch-1.c: New testcase. * tree-ssa-loop-unswitch.c (hoist_guard): Update profile. Index: testsuite/gcc.dg/loop-unswitch-2.c =================================================================== --- testsuite/gcc.dg/loop-unswitch-2.c (revision 245196) +++ testsuite/gcc.dg/loop-unswitch-2.c (working copy) @@ -12,4 +12,5 @@ void foo (float **a, float **b, float *c } /* { dg-final { scan-tree-dump-times "guard hoisted" 3 "unswitch" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */ Index: testsuite/gcc.dg/loop-unswitch-3.c =================================================================== --- testsuite/gcc.dg/loop-unswitch-3.c (revision 245196) +++ testsuite/gcc.dg/loop-unswitch-3.c (working copy) @@ -22,5 +22,6 @@ float *foo(int ustride, int size, float } /* { dg-final { scan-tree-dump-times "guard hoisted" 1 "unswitch" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */ Index: tree-ssa-loop-unswitch.c =================================================================== --- tree-ssa-loop-unswitch.c (revision 245196) +++ tree-ssa-loop-unswitch.c (working copy) @@ -787,6 +787,7 @@ hoist_guard (struct loop *loop, edge gua edge te, fe, e, new_edge; gimple *stmt; basic_block guard_bb = guard->src; + edge not_guard; gimple_stmt_iterator gsi; int flags = 0; bool fix_dom_of_exit; @@ -818,18 +819,80 @@ hoist_guard (struct loop *loop, edge gua update_stmt (cond_stmt); /* Create new loop pre-header. */ e = split_block (pre_header, last_stmt (pre_header)); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Moving guard %i->%i (prob %i) to bb %i, " + "new preheader is %i\n", + guard->src->index, guard->dest->index, guard->probability, + e->src->index, e->dest->index); + gcc_assert (loop_preheader_edge (loop)->src == e->dest); + if (guard == fe) { e->flags = EDGE_TRUE_VALUE; flags |= EDGE_FALSE_VALUE; + not_guard = te; } else { e->flags = EDGE_FALSE_VALUE; flags |= EDGE_TRUE_VALUE; + not_guard = fe; } new_edge = make_edge (pre_header, exit->dest, flags); + + /* Determine the probability that we skip the loop. Assume that loop has + same average number of iterations regardless outcome of guard. */ + new_edge->probability = guard->probability; + int skip_count = guard->src->count + ? RDIV (guard->count * pre_header->count, guard->src->count) + : apply_probability (guard->count, new_edge->probability); + + if (skip_count > e->count) + { + fprintf (dump_file, " Capping count; expect profile inconsistency\n"); + skip_count = e->count; + } + new_edge->count = skip_count; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Estimated probability of skipping loop is %i\n", + new_edge->probability); + + /* Update profile after the transform: + + First decrease count of path from newly hoisted loop guard + to loop header... */ + e->count -= skip_count; + e->probability = REG_BR_PROB_BASE - new_edge->probability; + e->dest->count = e->count; + e->dest->frequency = EDGE_FREQUENCY (e); + + /* ... now update profile to represent that original guard will be optimized + away ... */ + guard->probability = 0; + guard->count = 0; + not_guard->probability = REG_BR_PROB_BASE; + /* This count is wrong (frequency of not_guard does not change), + but will be scaled later. */ + not_guard->count = guard->src->count; + + /* ... finally scale everything in the loop except for guarded basic blocks + where profile does not change. */ + basic_block *body = get_loop_body (loop); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Scaling nonguarded BBs in loop:"); + for (unsigned int i = 0; i < loop->num_nodes; i++) + { + basic_block bb = body[i]; + if (!dominated_by_p (CDI_DOMINATORS, bb, not_guard->dest)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " %i", bb->index); + scale_bbs_frequencies_int (&bb, 1, e->probability, REG_BR_PROB_BASE); + } + } + if (fix_dom_of_exit) set_immediate_dominator (CDI_DOMINATORS, exit->dest, pre_header); /* Add NEW_ADGE argument for all phi in post-header block. */ @@ -856,7 +919,7 @@ hoist_guard (struct loop *loop, edge gua } if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " guard hoisted.\n"); + fprintf (dump_file, "\n guard hoisted.\n"); } /* Return true if phi argument for exit edge can be used