Hi, this patch turns edge growth cache into fast summary and fixes some fallout. I have noticed that we still use get_create on many places to access inline edge summary and turning them into gets uncovered furhter problems.
I have added sanity check to estimate_calls_size_and_time that inlined edges have no summaries which turned out to fail becuase symbols-summaries for some reason have flag m_initialize_when_cloning which is set to true for call summaries (and is non-existent for function summaries). When this flag is set at the time of edge duplication we create empty summary for source edge (if non-existent) and then copy it via hook to new edge. Martin, do you know why this flag was introduced? I have disabled it and noticed that cgraph_node::create_version_clone calls node duplication hooks for no good reason: newly produced function will get body and will eventually be registered as new function to callgraph. Additionally inliner sometimes attempt to update overall summary of already inlined functions and also functions with -O0. Both is deemed to fail since we do not have any analysis data on them. lto-bootstrapped/regtested x86_64-linux, comitted. * cgraphclones.c (cgraph_node::create_version_clone): Do not duplicate summaries. * ipa-fnsummary.c (ipa_fn_summary_alloc): Allocate size summary first. (ipa_fn_summary_t::duplicate): Use get instead of get_create to access call summaries. (dump_ipa_call_summary): Be ready for missing edge summaries. (analyze_function_body): Use get instead of get_create to access edge summary. (estimate_calls_size_and_time): Do not access summaries of inlined edges; sanity check they are missing. (ipa_call_context::estimate_size_and_time): Use get instead of get_create to access node summary. (inline_update_callee_summaries): Do not update depth of inlined edge. (ipa_merge_fn_summary_after_inlining): Remove inline edge from growth caches. (ipa_merge_fn_summary_after_inlining): Use get instead of get_create. * ipa-fnsummary.h (ipa_remove_from_growth_caches): Declare. * ipa-inline-analyssi.c (edge_growth_cache): Turn to fast summary. (initialize_growth_caches): Update. (do_estimate_edge_time): Remove redundant copy of context. (ipa_remove_from_growth_caches): New function. * ipa-inline.c (flatten_function): Update overall summary only when optimizing. (inline_to_all_callers): Update overall summary of function inlined to. * ipa-inline.h (edge_growth_cache): Turn to fast summary. * symbol-summary.h (call_summary_base): Set m_initialize_when_cloning to false. Index: cgraphclones.c =================================================================== --- cgraphclones.c (revision 277758) +++ cgraphclones.c (working copy) @@ -892,8 +892,6 @@ cgraph_node::create_version_clone (tree e->redirect_callee (new_version); } - symtab->call_cgraph_duplication_hooks (this, new_version); - dump_callgraph_transformation (this, new_version, suffix); return new_version; Index: ipa-fnsummary.c =================================================================== --- ipa-fnsummary.c (revision 277759) +++ ipa-fnsummary.c (working copy) @@ -553,9 +553,9 @@ static void ipa_fn_summary_alloc (void) { gcc_checking_assert (!ipa_fn_summaries); - ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap> (symtab); + ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); ipa_call_summaries = new ipa_call_summary_t (symtab); } @@ -688,7 +688,7 @@ ipa_fn_summary_t::duplicate (cgraph_node for (edge = dst->callees; edge; edge = next) { predicate new_predicate; - class ipa_call_summary *es = ipa_call_summaries->get_create (edge); + class ipa_call_summary *es = ipa_call_summaries->get (edge); next = edge->next_callee; if (!edge->inline_failed) @@ -707,7 +707,7 @@ ipa_fn_summary_t::duplicate (cgraph_node for (edge = dst->indirect_calls; edge; edge = next) { predicate new_predicate; - class ipa_call_summary *es = ipa_call_summaries->get_create (edge); + class ipa_call_summary *es = ipa_call_summaries->get (edge); next = edge->next_callee; gcc_checking_assert (edge->inline_failed); @@ -787,12 +787,15 @@ dump_ipa_call_summary (FILE *f, int inde int i; fprintf (f, - "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i", + "%*s%s/%i %s\n%*s freq:%4.2f", indent, "", callee->name (), callee->order, !edge->inline_failed ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed), - indent, "", es->loop_depth, edge->sreal_frequency ().to_double (), - es->call_stmt_size, es->call_stmt_time); + indent, "", edge->sreal_frequency ().to_double ()); + + if (es) + fprintf (f, " loop depth:%2i size:%2i time: %2i", + es->loop_depth, es->call_stmt_size, es->call_stmt_time); ipa_fn_summary *s = ipa_fn_summaries->get (callee); ipa_size_summary *ss = ipa_size_summaries->get (callee); @@ -801,14 +804,14 @@ dump_ipa_call_summary (FILE *f, int inde (int) (ss->size / ipa_fn_summary::size_scale), (int) s->estimated_stack_size); - if (es->predicate) + if (es && es->predicate) { fprintf (f, " predicate: "); es->predicate->dump (f, info->conds); } else fprintf (f, "\n"); - if (es->param.exists ()) + if (es && es->param.exists ()) for (i = 0; i < (int) es->param.length (); i++) { int prob = es->param[i].change_prob; @@ -2480,7 +2483,7 @@ analyze_function_body (struct cgraph_nod edge->speculative_call_info (direct, indirect, ref); gcc_assert (direct == edge); ipa_call_summary *es2 - = ipa_call_summaries->get_create (indirect); + = ipa_call_summaries->get (indirect); ipa_call_summaries->duplicate (edge, indirect, es, es2); } @@ -2924,10 +2927,20 @@ estimate_calls_size_and_time (struct cgr struct cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) { - class ipa_call_summary *es = ipa_call_summaries->get_create (e); + if (!e->inline_failed) + { + gcc_checking_assert (!ipa_call_summaries->get (e)); + estimate_calls_size_and_time (e->callee, size, min_size, time, + hints, + possible_truths, + known_vals, known_contexts, + known_aggs); + continue; + } + class ipa_call_summary *es = ipa_call_summaries->get (e); /* Do not care about zero sized builtins. */ - if (e->inline_failed && !es->call_stmt_size) + if (!es->call_stmt_size) { gcc_checking_assert (!es->call_stmt_time); continue; @@ -2935,27 +2948,18 @@ estimate_calls_size_and_time (struct cgr if (!es->predicate || es->predicate->evaluate (possible_truths)) { - if (e->inline_failed) - { - /* Predicates of calls shall not use NOT_CHANGED codes, - sowe do not need to compute probabilities. */ - estimate_edge_size_and_time (e, size, - es->predicate ? NULL : min_size, - time, REG_BR_PROB_BASE, - known_vals, known_contexts, - known_aggs, hints); - } - else - estimate_calls_size_and_time (e->callee, size, min_size, time, - hints, - possible_truths, - known_vals, known_contexts, - known_aggs); + /* Predicates of calls shall not use NOT_CHANGED codes, + sowe do not need to compute probabilities. */ + estimate_edge_size_and_time (e, size, + es->predicate ? NULL : min_size, + time, REG_BR_PROB_BASE, + known_vals, known_contexts, + known_aggs, hints); } } for (e = node->indirect_calls; e; e = e->next_callee) { - class ipa_call_summary *es = ipa_call_summaries->get_create (e); + class ipa_call_summary *es = ipa_call_summaries->get (e); if (!es->predicate || es->predicate->evaluate (possible_truths)) estimate_edge_size_and_time (e, size, @@ -3204,7 +3208,7 @@ ipa_call_context::estimate_size_and_time sreal *ret_nonspecialized_time, ipa_hints *ret_hints) { - class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node); + class ipa_fn_summary *info = ipa_fn_summaries->get (m_node); size_time_entry *e; int size = 0; sreal time = 0; @@ -3382,7 +3386,8 @@ inline_update_callee_summaries (struct c { if (!e->inline_failed) inline_update_callee_summaries (e->callee, depth); - ipa_call_summaries->get (e)->loop_depth += depth; + else + ipa_call_summaries->get (e)->loop_depth += depth; } for (e = node->indirect_calls; e; e = e->next_callee) ipa_call_summaries->get (e)->loop_depth += depth; @@ -3649,6 +3654,7 @@ ipa_merge_fn_summary_after_inlining (str /* Free summaries that are not maintained for inline clones/edges. */ ipa_call_summaries->remove (edge); ipa_fn_summaries->remove (edge->callee); + ipa_remove_from_growth_caches (edge); } /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating @@ -3657,8 +3663,8 @@ ipa_merge_fn_summary_after_inlining (str void ipa_update_overall_fn_summary (struct cgraph_node *node) { - class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); - class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); + class ipa_fn_summary *info = ipa_fn_summaries->get (node); + class ipa_size_summary *size_info = ipa_size_summaries->get (node); size_time_entry *e; int i; Index: ipa-fnsummary.h =================================================================== --- ipa-fnsummary.h (revision 277758) +++ ipa-fnsummary.h (working copy) @@ -364,5 +364,6 @@ void evaluate_properties_for_edge (struc void ipa_fnsummary_c_finalize (void); HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node); +void ipa_remove_from_growth_caches (struct cgraph_edge *edge); #endif /* GCC_IPA_FNSUMMARY_H */ Index: ipa-inline-analysis.c =================================================================== --- ipa-inline-analysis.c (revision 277758) +++ ipa-inline-analysis.c (working copy) @@ -51,7 +51,7 @@ along with GCC; see the file COPYING3. #include "gimplify.h" /* Cached node/edge growths. */ -call_summary<edge_growth_cache_entry *> *edge_growth_cache = NULL; +fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL; /* The context cache remembers estimated time/size and hints for given ipa_call_context of a call. */ @@ -125,7 +125,7 @@ void initialize_growth_caches () { edge_growth_cache - = new call_summary<edge_growth_cache_entry *> (symtab, false); + = new fast_call_summary<edge_growth_cache_entry *, va_heap> (symtab); node_context_cache = new fast_function_summary<node_context_summary *, va_heap> (symtab); } @@ -219,7 +219,6 @@ do_estimate_edge_time (struct cgraph_edg else node_context_cache_clear++; e->entry.ctx.release (true); - e->entry.ctx = ctx; ctx.estimate_size_and_time (&size, &min_size, &time, &nonspec_time, &hints); e->entry.size = size; @@ -275,6 +274,16 @@ reset_node_cache (struct cgraph_node *no node_context_cache->remove (node); } +/* Remove EDGE from caches once it was inlined. */ +void +ipa_remove_from_growth_caches (struct cgraph_edge *edge) +{ + if (node_context_cache) + node_context_cache->remove (edge->callee); + if (edge_growth_cache) + edge_growth_cache->remove (edge); +} + /* Return estimated callee growth after inlining EDGE. Only to be called via estimate_edge_size. */ Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 277758) +++ ipa-inline.c (working copy) @@ -2290,9 +2290,9 @@ flatten_function (struct cgraph_node *no } node->aux = NULL; - if (update) - ipa_update_overall_fn_summary (node->inlined_to - ? node->inlined_to : node); + cgraph_node *where = node->inlined_to ? node->inlined_to : node; + if (update && opt_for_fn (where->decl, optimize)) + ipa_update_overall_fn_summary (where); } /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases. @@ -2367,7 +2367,7 @@ inline_to_all_callers (struct cgraph_nod we have a lot of calls to the same function. */ for (hash_set<cgraph_node *>::iterator i = callers.begin (); i != callers.end (); ++i) - ipa_update_overall_fn_summary (*i); + ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i); return res; } Index: ipa-inline.h =================================================================== --- ipa-inline.h (revision 277758) +++ ipa-inline.h (working copy) @@ -39,7 +39,7 @@ public: hints (hints) {} }; -extern call_summary<edge_growth_cache_entry *> *edge_growth_cache; +extern fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache; /* In ipa-inline-analysis.c */ int estimate_size_after_inlining (struct cgraph_node *, struct cgraph_edge *); Index: symbol-summary.h =================================================================== --- symbol-summary.h (revision 277758) +++ symbol-summary.h (working copy) @@ -532,7 +532,7 @@ class call_summary_base public: /* Default construction takes SYMTAB as an argument. */ call_summary_base (symbol_table *symtab): m_symtab (symtab), - m_initialize_when_cloning (true) + m_initialize_when_cloning (false) {} /* Basic implementation of removal operation. */