Hi, this patch fixes profile updates while cloning. When new clone is produced its global profile is subtracted from the original function. If the original function profile drops to 0 we want to switch from global profiles to global0 profiles which is implemented by combine_with_ipa_count_within.
However this is done on all edges independnetly and it may happen that we end up combining global and globa0 profiles in one functions which is not a good idea. This implements profile_count::combine_with_ipa_count_within which is able to take into account that the counter is inside function with a given count. Bootstrapped/regtested x86_64-linux, comitted. Honza * profile-count.h (profile_count::combine_with_ipa_count_within): Declare. * profile-count.c (profile_count::combine_with_ipa_count_within): New. * cgraphclones.c (cgraph_edge::clone, cgraph_node::create_clone): Use it. Index: profile-count.h =================================================================== --- profile-count.h (revision 278809) +++ profile-count.h (working copy) @@ -1194,6 +1215,10 @@ public: global0. */ profile_count combine_with_ipa_count (profile_count ipa); + /* Same as combine_with_ipa_count but inside function with count IPA2. */ + profile_count combine_with_ipa_count_within + (profile_count ipa, profile_count ipa2); + /* The profiling runtime uses gcov_type, which is usually 64bit integer. Conversions back and forth are used to read the coverage and get it into internal representation. */ Index: profile-count.c =================================================================== --- profile-count.c (revision 278809) +++ profile-count.c (working copy) @@ -383,6 +388,23 @@ profile_count::combine_with_ipa_count (p return this->global0adjusted (); } +/* Sae as profile_count::combine_with_ipa_count but within function with count + IPA2. */ +profile_count +profile_count::combine_with_ipa_count_within (profile_count ipa, + profile_count ipa2) +{ + profile_count ret; + if (!initialized_p ()) + return *this; + if (ipa2.ipa () == ipa2 && ipa.initialized_p ()) + ret = ipa; + else + ret = combine_with_ipa_count (ipa); + gcc_checking_assert (ret.compatible_p (ipa2)); + return ret; +} + /* The profiling runtime uses gcov_type, which is usually 64bit integer. Conversions back and forth are used to read the coverage and get it into internal representation. */ Index: cgraphclones.c =================================================================== --- cgraphclones.c (revision 278809) +++ cgraphclones.c (working copy) @@ -136,8 +141,9 @@ cgraph_edge::clone (cgraph_node *n, gcal /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) - count = count.combine_with_ipa_count (count.ipa () - - new_edge->count.ipa ()); + count = count.combine_with_ipa_count_within (count.ipa () + - new_edge->count.ipa (), + caller->count); symtab->call_edge_duplication_hooks (this, new_edge); return new_edge; } @@ -341,7 +349,14 @@ cgraph_node::create_clone (tree new_decl /* Update IPA profile. Local profiles need no updating in original. */ if (update_original) - count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ()); + { + if (inlined_to) + count = count.combine_with_ipa_count_within (count.ipa () + - prof_count.ipa (), + inlined_to->count); + else + count = count.combine_with_ipa_count (count.ipa () - prof_count.ipa ()); + } new_node->decl = new_decl; new_node->register_symbol (); new_node->origin = origin;