https://gcc.gnu.org/g:ad3fb999a1b56893f0f6296a52fe2af550763fee
commit r16-3990-gad3fb999a1b56893f0f6296a52fe2af550763fee Author: Jan Hubicka <[email protected]> Date: Fri Sep 19 19:05:34 2025 +0200 Improve ipa-cp devirtualization costing This patch fixed devirtualization time benefit of ipa-cp which should be scaled by edge frequency but it is not. The cost model is still not correct for speculative calls, since it does not take into account the fact that code size will shrink if speculation is removed. I also made cgraph_edge::make_direct to not ICE when there are multiple speculations to same target. This can happen in combination of devirtualization and ICF in some rare cases. gcc/ChangeLog: * cgraph.cc (cgraph_edge::make_direct): Do not ICE when there are multiple speculations to comptaible targets (cgraph_edge::combined_sreal_frequency): New member function. * cgraph.h (cgraph_edge::combined_sreal_frequency): Declare. * ipa-cp.cc (devirtualization_time_bonus): Return sreal; consider profile. (estimate_local_effects): Likewise. Diff: --- gcc/cgraph.cc | 27 ++++++++++++++++++++++----- gcc/cgraph.h | 4 ++++ gcc/ipa-cp.cc | 22 +++++++++++++--------- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index 32071a84bacc..e5902a0fdcd0 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -1466,13 +1466,11 @@ cgraph_edge::make_direct (cgraph_edge *edge, cgraph_node *callee) /* Compare ref not direct->callee. Direct edge is possibly inlined or redirected. */ if (!direct->speculative_call_target_ref () - ->referred->semantically_equivalent_p (callee)) + ->referred->semantically_equivalent_p (callee) + || found) edge = direct->resolve_speculation (direct, NULL); else - { - gcc_checking_assert (!found); - found = direct; - } + found = direct; } /* On successful speculation just remove the indirect edge and @@ -4422,6 +4420,25 @@ cgraph_edge::sreal_frequency () : caller->count); } +/* Expected frequency of executions within the function. + If edge is speculative, sum all its indirect targets. */ + +sreal +cgraph_edge::combined_sreal_frequency () +{ + if (!speculative) + return sreal_frequency (); + cgraph_edge *e = this; + if (e->callee) + e = e->speculative_call_indirect_edge (); + sreal sum = e->sreal_frequency (); + for (e = e->first_speculative_call_target (); + e; + e = e->next_speculative_call_target ()) + sum += e->sreal_frequency (); + return sum; +} + /* During LTO stream in this can be used to check whether call can possibly be internal to the current translation unit. */ diff --git a/gcc/cgraph.h b/gcc/cgraph.h index deca564a8e37..d668a8182729 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1991,6 +1991,10 @@ public: /* Expected frequency of executions within the function. */ sreal sreal_frequency (); + + /* Expected frequency of executions within the function. + If edge is speculative, sum all its indirect targets. */ + sreal combined_sreal_frequency (); private: /* Unique id of the edge. */ int m_uid; diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 4e03c3661067..f1623c147f47 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -3289,14 +3289,17 @@ ipa_get_indirect_edge_target (struct cgraph_edge *ie, } /* Calculate devirtualization time bonus for NODE, assuming we know information - about arguments stored in AVALS. */ + about arguments stored in AVALS. -static int + FIXME: This function will also consider devirtualization of calls that are + known to be dead in the clone. */ + +static sreal devirtualization_time_bonus (struct cgraph_node *node, ipa_auto_call_arg_values *avals) { struct cgraph_edge *ie; - int res = 0; + sreal res = 0; for (ie = node->indirect_calls; ie; ie = ie->next_callee) { @@ -3314,7 +3317,7 @@ devirtualization_time_bonus (struct cgraph_node *node, continue; /* Only bare minimum benefit for clearly un-inlineable targets. */ - res += 1; + int savings = 1; callee = cgraph_node::get (target); if (!callee || !callee->definition) continue; @@ -3331,12 +3334,13 @@ devirtualization_time_bonus (struct cgraph_node *node, int max_inline_insns_auto = opt_for_fn (callee->decl, param_max_inline_insns_auto); if (size <= max_inline_insns_auto / 4) - res += 31 / ((int)speculative + 1); + savings += 31 / ((int)speculative + 1); else if (size <= max_inline_insns_auto / 2) - res += 15 / ((int)speculative + 1); + savings += 15 / ((int)speculative + 1); else if (size <= max_inline_insns_auto || DECL_DECLARED_INLINE_P (callee->decl)) - res += 7 / ((int)speculative + 1); + savings += 7 / ((int)speculative + 1); + res = res + ie->combined_sreal_frequency () * (sreal) savings; } return res; @@ -3624,8 +3628,8 @@ estimate_local_effects (struct cgraph_node *node) ipa_auto_call_arg_values avals; always_const = gather_context_independent_values (info, &avals, true, &removable_params_cost); - int devirt_bonus = devirtualization_time_bonus (node, &avals); - if (always_const || devirt_bonus + sreal devirt_bonus = devirtualization_time_bonus (node, &avals); + if (always_const || devirt_bonus > 0 || (removable_params_cost && clone_for_param_removal_p (node))) { struct caller_statistics stats;
