Hi, ipa-inline-transforms first applies edge redirection and then scales profile. For some reason cgraph_edge::redirect_call_stmt_to_callee copies bb's count into callgraph edge count. This leads to inconsistency because cfg profile is before scaling at this point. Fixed thus.
profilebootstrapped/regtested x86_64-linux, comitted. * ipa-inline-transform.c (inline_transform): Scale profile before redirecting. Index: ipa-inline-transform.c =================================================================== --- ipa-inline-transform.c (revision 278811) +++ ipa-inline-transform.c (working copy) @@ -681,6 +681,31 @@ inline_transform (struct cgraph_node *no if (preserve_function_body_p (node)) save_inline_function_body (node); + profile_count num = node->count; + profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool scale = num.initialized_p () && !(num == den); + if (scale) + { + profile_count::adjust_for_ipa_scaling (&num, &den); + if (dump_file) + { + fprintf (dump_file, "Applying count scale "); + num.dump (dump_file); + fprintf (dump_file, "/"); + den.dump (dump_file); + fprintf (dump_file, "\n"); + } + + basic_block bb; + cfun->cfg->count_max = profile_count::uninitialized (); + FOR_ALL_BB_FN (bb, cfun) + { + bb->count = bb->count.apply_scale (num, den); + cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + } + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; + } + for (e = node->callees; e; e = next) { if (!e->inline_failed) @@ -693,32 +718,8 @@ inline_transform (struct cgraph_node *no timevar_push (TV_INTEGRATION); if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline)) { - profile_count num = node->count; - profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; - bool scale = num.initialized_p () && !(num == den); - if (scale) - { - profile_count::adjust_for_ipa_scaling (&num, &den); - if (dump_file) - { - fprintf (dump_file, "Applying count scale "); - num.dump (dump_file); - fprintf (dump_file, "/"); - den.dump (dump_file); - fprintf (dump_file, "\n"); - } - - basic_block bb; - cfun->cfg->count_max = profile_count::uninitialized (); - FOR_ALL_BB_FN (bb, cfun) - { - bb->count = bb->count.apply_scale (num, den); - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); - } - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; - } todo = optimize_inline_calls (current_function_decl); - } + } timevar_pop (TV_INTEGRATION); cfun->always_inline_functions_inlined = true;