Re: [PATCH 3/3] add hash_map class
On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This patch adds a hash_map class so we can consolidate the boiler plate around using hash_table as a map, it also allows us to get rid of pointer_map which I do in this patch by converting its users to hash_map. Hello Trev, I like your changes! One small question about pointer_set, which is unable of deletion of items. Do you plan to migrate and simplify hash_map to be a replacement for pointer_set? Thanks, Martin bootstrapped + regtested without regression on x86_64-unknown-linux-gnu, ok? Trev gcc/ * alloc-pool.c (alloc_pool_hash): Use hash_map instead of hash_table. * dominance.c (iterate_fix_dominators): Use hash_map instead of pointer_map. * hash-map.h: New file. * ipa-comdats.c: Use hash_map instead of pointer_map. * lto-section-out.c: Adjust. * lto-streamer.h: Replace pointer_map with hash_map. * symtab.c (verify_symtab): Likewise. * tree-ssa-strlen.c (decl_to_stridxlist_htab): Likewise. * tree-ssa-uncprop.c (val_ssa_equiv): Likewise. * tree-streamer.h: Likewise. * tree-streamer.c: Adjust. * pointer-set.h: Remove pointer_map. lto/ * lto.c (canonical_type_hash_cache): Use hash_map instead of pointer_map. diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c index 49209ee..0d31835 100644 --- a/gcc/alloc-pool.c +++ b/gcc/alloc-pool.c @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include system.h #include alloc-pool.h #include hash-table.h +#include hash-map.h #define align_eight(x) (((x+7) 3) 3) @@ -69,7 +70,6 @@ static ALLOC_POOL_ID_TYPE last_id; size for that pool. */ struct alloc_pool_descriptor { - const char *name; /* Number of pools allocated. */ unsigned long created; /* Gross allocated storage. */ @@ -82,48 +82,17 @@ struct alloc_pool_descriptor int elt_size; }; -/* Hashtable helpers. */ -struct alloc_pool_hasher : typed_noop_remove alloc_pool_descriptor -{ - typedef alloc_pool_descriptor value_type; - typedef char compare_type; - static inline hashval_t hash (const alloc_pool_descriptor *); - static inline bool equal (const value_type *, const compare_type *); -}; - -inline hashval_t -alloc_pool_hasher::hash (const value_type *d) -{ - return htab_hash_pointer (d-name); -} - -inline bool -alloc_pool_hasher::equal (const value_type *d, - const compare_type *p2) -{ - return d-name == p2; -} - /* Hashtable mapping alloc_pool names to descriptors. */ -static hash_tablealloc_pool_hasher *alloc_pool_hash; +static hash_mapconst char *, alloc_pool_descriptor *alloc_pool_hash; /* For given name, return descriptor, create new if needed. */ static struct alloc_pool_descriptor * allocate_pool_descriptor (const char *name) { - struct alloc_pool_descriptor **slot; - if (!alloc_pool_hash) -alloc_pool_hash = new hash_tablealloc_pool_hasher (10); - - slot = alloc_pool_hash-find_slot_with_hash (name, - htab_hash_pointer (name), - INSERT); - if (*slot) -return *slot; - *slot = XCNEW (struct alloc_pool_descriptor); - (*slot)-name = name; - return *slot; +alloc_pool_hash = new hash_mapconst char *, alloc_pool_descriptor (10); + + return alloc_pool_hash-get_or_insert (name); } /* Create a pool of things of size SIZE, with NUM in each block we @@ -375,23 +344,22 @@ struct output_info unsigned long total_allocated; }; -/* Called via hash_table.traverse. Output alloc_pool descriptor pointed out by +/* Called via hash_map.traverse. Output alloc_pool descriptor pointed out by SLOT and update statistics. */ -int -print_alloc_pool_statistics (alloc_pool_descriptor **slot, +bool +print_alloc_pool_statistics (const char *const name, +const alloc_pool_descriptor d, struct output_info *i) { - struct alloc_pool_descriptor *d = *slot; - - if (d-allocated) + if (d.allocated) { fprintf (stderr, %-22s %6d %10lu %10lu(%10lu) %10lu(%10lu) %10lu(%10lu)\n, - d-name, d-elt_size, d-created, d-allocated, - d-allocated / d-elt_size, d-peak, d-peak / d-elt_size, - d-current, d-current / d-elt_size); - i-total_allocated += d-allocated; - i-total_created += d-created; + name, d.elt_size, d.created, d.allocated, + d.allocated / d.elt_size, d.peak, d.peak / d.elt_size, + d.current, d.current / d.elt_size); + i-total_allocated += d.allocated; + i-total_created += d.created; } return 1; } diff --git a/gcc/dominance.c b/gcc/dominance.c index 7adec4f..be0a439 100644 --- a/gcc/dominance.c +++ b/gcc/dominance.c @@ -43,6 +43,7 @@ #include diagnostic-core.h
Re: [PATCH 3/3] add hash_map class
On 06/24/2014 02:40 PM, Trevor Saunders wrote: On Tue, Jun 24, 2014 at 02:29:53PM +0200, Martin Liška wrote: On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This patch adds a hash_map class so we can consolidate the boiler plate around using hash_table as a map, it also allows us to get rid of pointer_map which I do in this patch by converting its users to hash_map. Hello Trev, I like your changes! One small question about pointer_set, which is unable of deletion of items. Do you plan to migrate and simplify hash_map to be a replacement for pointer_set? I'm not sure I follow the question. I imagine that hash_map will largely stay as it is, other than perhaps some const correctness stuff, and supporting element removal at some point. Supporting element removal should be trivial since I'm just wrapping hash_table which already supports it, but I didn't want to add it until there was code testing it. As you see in the patch I removed pointer_map so its already a replacement for that functionality. As for pointer_set since its a set not a map hash_table would seem closer to me. Understand, yeah, I was asking if we plan to add element removal also for (pointer_)set? I consider such functionality useful, but it looks not related to your patch. If I understand correctly, you are not planning to use hash_* as wrapping data structure for set. Martin Trev Thanks, Martin bootstrapped + regtested without regression on x86_64-unknown-linux-gnu, ok? Trev gcc/ * alloc-pool.c (alloc_pool_hash): Use hash_map instead of hash_table. * dominance.c (iterate_fix_dominators): Use hash_map instead of pointer_map. * hash-map.h: New file. * ipa-comdats.c: Use hash_map instead of pointer_map. * lto-section-out.c: Adjust. * lto-streamer.h: Replace pointer_map with hash_map. * symtab.c (verify_symtab): Likewise. * tree-ssa-strlen.c (decl_to_stridxlist_htab): Likewise. * tree-ssa-uncprop.c (val_ssa_equiv): Likewise. * tree-streamer.h: Likewise. * tree-streamer.c: Adjust. * pointer-set.h: Remove pointer_map. lto/ * lto.c (canonical_type_hash_cache): Use hash_map instead of pointer_map. diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c index 49209ee..0d31835 100644 --- a/gcc/alloc-pool.c +++ b/gcc/alloc-pool.c @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see #include system.h #include alloc-pool.h #include hash-table.h +#include hash-map.h #define align_eight(x) (((x+7) 3) 3) @@ -69,7 +70,6 @@ static ALLOC_POOL_ID_TYPE last_id; size for that pool. */ struct alloc_pool_descriptor { - const char *name; /* Number of pools allocated. */ unsigned long created; /* Gross allocated storage. */ @@ -82,48 +82,17 @@ struct alloc_pool_descriptor int elt_size; }; -/* Hashtable helpers. */ -struct alloc_pool_hasher : typed_noop_remove alloc_pool_descriptor -{ - typedef alloc_pool_descriptor value_type; - typedef char compare_type; - static inline hashval_t hash (const alloc_pool_descriptor *); - static inline bool equal (const value_type *, const compare_type *); -}; - -inline hashval_t -alloc_pool_hasher::hash (const value_type *d) -{ - return htab_hash_pointer (d-name); -} - -inline bool -alloc_pool_hasher::equal (const value_type *d, - const compare_type *p2) -{ - return d-name == p2; -} - /* Hashtable mapping alloc_pool names to descriptors. */ -static hash_tablealloc_pool_hasher *alloc_pool_hash; +static hash_mapconst char *, alloc_pool_descriptor *alloc_pool_hash; /* For given name, return descriptor, create new if needed. */ static struct alloc_pool_descriptor * allocate_pool_descriptor (const char *name) { - struct alloc_pool_descriptor **slot; - if (!alloc_pool_hash) -alloc_pool_hash = new hash_tablealloc_pool_hasher (10); - - slot = alloc_pool_hash-find_slot_with_hash (name, - htab_hash_pointer (name), - INSERT); - if (*slot) -return *slot; - *slot = XCNEW (struct alloc_pool_descriptor); - (*slot)-name = name; - return *slot; +alloc_pool_hash = new hash_mapconst char *, alloc_pool_descriptor (10); + + return alloc_pool_hash-get_or_insert (name); } /* Create a pool of things of size SIZE, with NUM in each block we @@ -375,23 +344,22 @@ struct output_info unsigned long total_allocated; }; -/* Called via hash_table.traverse. Output alloc_pool descriptor pointed out by +/* Called via hash_map.traverse. Output alloc_pool descriptor pointed out by SLOT and update statistics. */ -int -print_alloc_pool_statistics (alloc_pool_descriptor **slot, +bool +print_alloc_pool_statistics (const char *const name, +const alloc_pool_descriptor d, struct output_info
Re: [PATCH 3/3] add hash_map class
On 06/24/2014 09:31 PM, Richard Biener wrote: On June 24, 2014 9:16:34 PM CEST, Trevor Saunders tsaund...@mozilla.com wrote: On Tue, Jun 24, 2014 at 08:23:49PM +0200, Jan Hubicka wrote: On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote: From: Trevor Saunders tsaund...@mozilla.com Hi, This patch adds a hash_map class so we can consolidate the boiler plate around using hash_table as a map, it also allows us to get rid of pointer_map which I do in this patch by converting its users to hash_map. Hello Trev, I like your changes! One small question about pointer_set, which is unable of deletion of items. Do you plan to migrate and simplify hash_map to be a replacement for pointer_set? Note that pointer-map use in LTO is quite performance critical. It would be good to double check that the new use of hash does not produce slower code. I believe the compiled code should be very similar, but I'll do some measuring to check. More important is memory use. Richard. Hi, there's memory usage graph for current trunk and before Trevor's patchset. It looks there's no memory footprint regression. https://drive.google.com/file/d/0B0pisUJ80pO1OG5uY28yNFRnWTA/edit?usp=sharing Martin Trev Honza
Re: [PATCH] IPA REF: refactoring
On 06/24/2014 08:21 PM, Jan Hubicka wrote: Hello, this patch changes IPA REF API to c++ style. Changes were suggested and consulted with Honza. Patch has been pre approved, will be committed if no comments. Bootstrapped on x86_64-pc-linux-gnu, no regressions. Thanks, Martin ChangeLog: 2014-06-22 Martin Liska mli...@suse.cz * Makefile.in: Removed header file (ipa-ref-inline.h). * cgraph.c (cgraph_turn_edge_to_speculative): New IPA REF function called. (cgraph_speculative_call_info): Likewise. (cgraph_for_node_thunks_and_aliases): Likewise. (cgraph_for_node_and_aliases): Likewise. (verify_cgraph_node): Likewise. * cgraph.h: Batch of IPA REF functions become member functions of symtab_node: add_reference, maybe_add_reference, clone_references, clone_referring, clone_reference, find_reference, remove_stmt_references, remove_all_references, remove_all_referring, dump_references, dump_referring, has_alias_p, iterate_reference, iterate_referring. * cgraphbuild.c (record_reference): New IPA REF function used. (record_type_list): Likewise. (record_eh_tables): Likewise. (mark_address): Likewise. (mark_load): Likewise. (mark_store): Likewise. (pass_build_cgraph_edges): Likewise. (rebuild_cgraph_edge): Likewise. (cgraph_rebuild_references): Likewise. (pass_remove_cgraph_callee_edges): Likewise. * cgraphclones.c (cgraph_clone_node): Likewise. (cgraph_create_virtual_clone): Likewise. (cgraph_materialize_clone): Likewise. (cgraph_materialize_all_clones): Likewise. * cgraphunit.c (cgraph_reset_node): Likewise. (cgraph_reset_node): Likewise. (analyze_function): Likewise. (assemble_thunks_and_aliases): Likewise. (expand_function): Likewise. * ipa-comdats.c (propagate_comdat_group): Likewise. (enqueue_references): Likewise. * ipa-cp.c (ipcp_discover_new_direct_edges): Likewise. (create_specialized_node): Likewise. * ipa-devirt.c (referenced_from_vtable_p): Likewise. * ipa-inline-transform.c (can_remove_node_now_p_1): Likewise. * ipa-inline.c (reset_edge_caches): Likewise. (update_caller_keys): Likewise. (execute): Likewise. * ipa-prop.c (remove_described_reference): Likewise. (propagate_controlled_uses): Likewise. (ipa_edge_duplication_hook): Likewise. (ipa_modify_call_arguments): Likewise. * ipa-pure-const.c (propagate_pure_const): Likewise. * ipa-ref-inline.h: Header file removed, functions moved to symtab_node class. * ipa-ref.c (remove_reference): New class member function. (cannot_lead_to_return): New class member function. (referring_ref_list): Likewise. (referred_ref_list): Likewise. Rest of functions moved to symtab_node class. * ipa-ref.h: New member functions remove_reference, cannot_lead_to_return, referring_ref_list, referred_ref_list added to ipa_ref class. ipa_ref_list class has new member functions: first_reference, first_referring, clear, nreferences. * ipa-reference.c (analyze_function): New IPA REF function used. (write_node_summary_p): Likewise. (ipa_reference_write_optimization_summary): Likewise. * ipa-split.c (split_function): Likewise. * ipa-utils.c (ipa_reverse_postorder): Likewise. * ipa-visibility.c (cgraph_non_local_node_p_1): Likewise. (function_and_variable_visibility): Likewise. * ipa.c (has_addr_references_p): Likewise. (process_references): Argument type changed. (symtab_remove_unreachable_nodes): New IPA REF function used. (process_references): Likewise. (set_writeonly_bit): Likewise. * lto-cgraph.c: Implementation of new symtab_node member functions that uses new IPA REF functions. * lto-streamer-in.c (fixup_call_stmt_edges_1): New IPA REF function used. * lto-streamer-out.c (output_symbol_p): Likewise. * lto-streamer.h (referenced_from_this_partition_p): Argument type changed. * lto/lto-partition.c (add_references_to_partition): New IPA REF function used. (add_symbol_to_partition_1): Likewise. (lto_balanced_map): Likewise. * lto/lto-symtab.c (lto_cgraph_replace_node): Likewise. * symtab.c: Implementation of new IPA REF API. * trans-mem.c (ipa_tm_create_version_alias): New IPA REF function used. (ipa_tm_create_version): Likewise. (ipa_tm_execute): Likewise. * tree-emutls.c (gen_emutls_addr): Likewise. * tree-inline.c (copy_bb): Likewise. (delete_unreachable_blocks_update_callgraph): Likewise. * varpool.c (varpool_remove_unreferenced_decls): Likewise. (varpool_for_node_and_aliases): Likewise. Patch is OK. Thanks a lot for working on it. Note that I added the single_use pass that walks refs, so you need to update it too before commiting. Thank you for your notice, patch has been just commited. Martin
[PATCH] Devirtualization dump functions fix
Hello, I encountered similar issue to PR ipa/61462 where location_t locus = gimple_location (e-call_stmt) is called for e-call_stmt == NULL (Firefox with -flto -fdump-ipa-devirt). So that, I decided to introduce new function that is called for all potentially unsafe locations. I am wondering if a newly added function can be added in more seamless way (without playing with va_list and ATTRIBUTE_PRINTF stuff)? Bootstrapped and regtested on x86_64-unknown-linux-gnu. Thanks, Martin ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * include/ansidecl.h: New collection of ATTRIBUTE_NULL_PRINTF_X_0 defined. gcc/ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * dumpfile.h: New function dump_printf_loc_for_stmt. * dumpfile.c: Implementation added. (dump_vprintf): New function.i * cgraphunit.c: dump_printf_loc_for_stmt usage replaces dump_printf_loc. * gimple-fold.c: Likewise. * ipa-devirt.c: Likewise. * ipa-prop.c: Likewise. * ipa.c: Likewise. * tree-ssa-pre.c: Likewise. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 76b2fda1..3b01718 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -905,12 +905,9 @@ walk_polymorphic_call_targets (pointer_set_t *reachable_call_targets, TDF_SLIM); } if (dump_enabled_p ()) -{ - location_t locus = gimple_location (edge-call_stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus, - devirtualizing call in %s to %s\n, - edge-caller-name (), target-name ()); - } + dump_printf_loc_for_stmt (MSG_OPTIMIZED_LOCATIONS, edge-call_stmt, + devirtualizing call in %s to %s\n, + edge-caller-name (), target-name ()); cgraph_make_edge_direct (edge, target); cgraph_redirect_edge_call_stmt_to_callee (edge); diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c index fd630a6..b7a791c 100644 --- a/gcc/dumpfile.c +++ b/gcc/dumpfile.c @@ -23,6 +23,12 @@ along with GCC; see the file COPYING3. If not see #include diagnostic-core.h #include dumpfile.h #include tree.h +#include basic-block.h +#include tree-ssa-alias.h +#include internal-fn.h +#include gimple-expr.h +#include is-a.h +#include gimple.h #include gimple-pretty-print.h #include context.h @@ -343,52 +349,80 @@ dump_generic_expr_loc (int dump_kind, source_location loc, } } -/* Output a formatted message using FORMAT on appropriate dump streams. */ +/* Output a formatted message using FORMAT on appropriate dump streams. + Accepts va_list AP as the last argument. */ -void -dump_printf (int dump_kind, const char *format, ...) +ATTRIBUTE_NULL_PRINTF_2_0 +static void +dump_vprintf (int dump_kind, const char *format, va_list ap) { if (dump_file (dump_kind pflags)) -{ - va_list ap; - va_start (ap, format); vfprintf (dump_file, format, ap); - va_end (ap); -} if (alt_dump_file (dump_kind alt_flags)) -{ - va_list ap; - va_start (ap, format); vfprintf (alt_dump_file, format, ap); - va_end (ap); -} } -/* Similar to dump_printf, except source location is also printed. */ +/* Output a formatted message using FORMAT on appropriate dump streams. */ void -dump_printf_loc (int dump_kind, source_location loc, const char *format, ...) +dump_printf (int dump_kind, const char *format, ...) +{ + va_list ap; + va_start (ap, format); + dump_vprintf (dump_kind, format, ap); + va_end (ap); +} + +/* Similar to dump_printf, except source location is also printed. + Accepts va_list AP as the last argument. */ + +void +dump_vprintf_loc (int dump_kind, source_location loc, const char *format, + va_list ap) { if (dump_file (dump_kind pflags)) { - va_list ap; dump_loc (dump_kind, dump_file, loc); - va_start (ap, format); vfprintf (dump_file, format, ap); - va_end (ap); } if (alt_dump_file (dump_kind alt_flags)) { - va_list ap; dump_loc (dump_kind, alt_dump_file, loc); - va_start (ap, format); vfprintf (alt_dump_file, format, ap); - va_end (ap); } } +/* Similar to dump_printf, except source location is also printed. */ + +void +dump_printf_loc (int dump_kind, source_location loc, const char *format, ...) +{ + va_list ap; + va_start (ap, format); + dump_vprintf_loc (dump_kind, loc, format, ap); + va_end (ap); +} + +/* Similar to dump_printf, except source location is also printed if STMT + is not null. Otherwise, fallback to dump_fprintf is called. */ + +void +dump_printf_loc_for_stmt (int dump_kind, const_gimple stmt, const char *format, + ...) +{ + va_list ap; + va_start (ap, format); + + if (stmt) +dump_vprintf_loc (dump_kind, gimple_location (stmt), format, ap); + else +dump_vprintf (dump_kind, format, ap); + + va_end (ap); +} + /* Start a dump for PHASE. Store user-supplied dump flags in *FLAG_PTR. Return
Re: [PATCH] Devirtualization dump functions fix
On 06/26/2014 03:20 PM, Richard Biener wrote: On Thu, Jun 26, 2014 at 3:01 PM, Martin Liška mli...@suse.cz wrote: Hello, I encountered similar issue to PR ipa/61462 where location_t locus = gimple_location (e-call_stmt) is called for e-call_stmt == NULL (Firefox with -flto -fdump-ipa-devirt). So that, I decided to introduce new function that is called for all potentially unsafe locations. I am wondering if a newly added function can be added in more seamless way (without playing with va_list and ATTRIBUTE_PRINTF stuff)? Bootstrapped and regtested on x86_64-unknown-linux-gnu. Hmm, I don't like that very much - dump_printf_loc_for_stmt still implies stmt is not NULL. So you could have fixed gimple_location as well. I suppose dump_printf_loc already does sth sane with UNKNOWN_LOCATION. Richard. Hi, you are right that it is quite complex change. Do you mean this one line change can be sufficient ? diff --git a/gcc/gimple.h b/gcc/gimple.h index ceefbc0..954195e 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1498,7 +1498,7 @@ gimple_set_block (gimple g, tree block) static inline location_t gimple_location (const_gimple g) { - return g-location; + return g ? g-location : UNKNOWN_LOCATION; } /* Return pointer to location information for statement G. */ I will double-check if it solves the problem ;) Martin Thanks, Martin ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * include/ansidecl.h: New collection of ATTRIBUTE_NULL_PRINTF_X_0 defined. gcc/ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * dumpfile.h: New function dump_printf_loc_for_stmt. * dumpfile.c: Implementation added. (dump_vprintf): New function.i * cgraphunit.c: dump_printf_loc_for_stmt usage replaces dump_printf_loc. * gimple-fold.c: Likewise. * ipa-devirt.c: Likewise. * ipa-prop.c: Likewise. * ipa.c: Likewise. * tree-ssa-pre.c: Likewise.
Re: [PATCH] Devirtualization dump functions fix
On 06/26/2014 04:18 PM, Jakub Jelinek wrote: On Thu, Jun 26, 2014 at 04:10:03PM +0200, Richard Biener wrote: On Thu, Jun 26, 2014 at 3:43 PM, Martin Liška mli...@suse.cz wrote: On 06/26/2014 03:20 PM, Richard Biener wrote: On Thu, Jun 26, 2014 at 3:01 PM, Martin Liška mli...@suse.cz wrote: Hello, I encountered similar issue to PR ipa/61462 where location_t locus = gimple_location (e-call_stmt) is called for e-call_stmt == NULL (Firefox with -flto -fdump-ipa-devirt). So that, I decided to introduce new function that is called for all potentially unsafe locations. I am wondering if a newly added function can be added in more seamless way (without playing with va_list and ATTRIBUTE_PRINTF stuff)? Bootstrapped and regtested on x86_64-unknown-linux-gnu. Hmm, I don't like that very much - dump_printf_loc_for_stmt still implies stmt is not NULL. So you could have fixed gimple_location as well. I suppose dump_printf_loc already does sth sane with UNKNOWN_LOCATION. Richard. Hi, you are right that it is quite complex change. Do you mean this one line change can be sufficient ? diff --git a/gcc/gimple.h b/gcc/gimple.h index ceefbc0..954195e 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1498,7 +1498,7 @@ gimple_set_block (gimple g, tree block) static inline location_t gimple_location (const_gimple g) { - return g-location; + return g ? g-location : UNKNOWN_LOCATION; } /* Return pointer to location information for statement G. */ I will double-check if it solves the problem ;) Well yes - it is of course similar broken in spirit but at least a lot simpler ;) I'd put a comment there why we do check g for NULL. But it increases overhead, there are hundreds of gimple_location calls and most of them will never pass NULL. Can't you simply do what you do in the inline here in the couple of spots where the stmt might be NULL? Sure, do you have any suggestion how should be called such function? Suggestion: gimple_location_or_unknown ? Thanks, Martin Jakub
Re: [PATCH] Devirtualization dump functions fix
On 06/26/2014 04:29 PM, Jakub Jelinek wrote: On Thu, Jun 26, 2014 at 04:27:49PM +0200, Martin Liška wrote: Well yes - it is of course similar broken in spirit but at least a lot simpler ;) I'd put a comment there why we do check g for NULL. But it increases overhead, there are hundreds of gimple_location calls and most of them will never pass NULL. Can't you simply do what you do in the inline here in the couple of spots where the stmt might be NULL? Sure, do you have any suggestion how should be called such function? Suggestion: gimple_location_or_unknown ? gimple_location_safe or gimple_safe_location? Jakub Thanks, there's new patch. Patch has been tested for Firefox with -flto -fdump-ipa-devirt. Bootstrap and regression tests have been running. Ready for trunk after regression tests? ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * gimple.h (gimple_safe_location): New function introduced. * cgraphunit.c (walk_polymorphic_call_targets): Usage of gimple_safe_location replaces gimple_location. (gimple_fold_call): Likewise. * ipa-devirt.c (ipa_devirt): Likewise. * ipa-prop.c (ipa_make_edge_direct_to_target): Likewise. * ipa.c (walk_polymorphic_call_targets): Likewise. * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children): Likewise. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 76b2fda1..2bf5216 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -906,7 +906,7 @@ walk_polymorphic_call_targets (pointer_set_t *reachable_call_targets, } if (dump_enabled_p ()) { - location_t locus = gimple_location (edge-call_stmt); + location_t locus = gimple_safe_location (edge-call_stmt); dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus, devirtualizing call in %s to %s\n, edge-caller-name (), target-name ()); diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 403dee7..ad230be 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -387,7 +387,7 @@ fold_gimple_assign (gimple_stmt_iterator *si) fndecl = builtin_decl_implicit (BUILT_IN_UNREACHABLE); if (dump_enabled_p ()) { - location_t loc = gimple_location (stmt); + location_t loc = gimple_safe_location (stmt); dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, resolving virtual function address reference to function %s\n, @@ -1131,7 +1131,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) tree lhs = gimple_call_lhs (stmt); if (dump_enabled_p ()) { - location_t loc = gimple_location (stmt); + location_t loc = gimple_safe_location (stmt); dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, folding virtual function call to %s\n, targets.length () == 1 diff --git a/gcc/gimple.h b/gcc/gimple.h index ceefbc0..d401d47 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1501,6 +1501,15 @@ gimple_location (const_gimple g) return g-location; } +/* Return location information for statement G if g is not NULL. + Otherwise, UNKNOWN_LOCATION is returned. */ + +static inline location_t +gimple_safe_location (const_gimple g) +{ + return g ? gimple_location (g) : UNKNOWN_LOCATION; +} + /* Return pointer to location information for statement G. */ static inline const location_t * diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c index 21f4f11..4e5dae8 100644 --- a/gcc/ipa-devirt.c +++ b/gcc/ipa-devirt.c @@ -2080,7 +2080,7 @@ ipa_devirt (void) { if (dump_enabled_p ()) { -location_t locus = gimple_location (e-call_stmt); +location_t locus = gimple_safe_location (e-call_stmt); dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus, speculatively devirtualizing call in %s/%i to %s/%i\n, n-name (), n-order, diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 1e10b53..c6967be 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -2673,17 +2673,11 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target) if (dump_enabled_p ()) { - const char *fmt = discovered direct call to non-function in %s/%i, -making it __builtin_unreachable\n; - - if (ie-call_stmt) - { - location_t loc = gimple_location (ie-call_stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt, - ie-caller-name (), ie-caller-order); - } - else if (dump_file) - fprintf (dump_file, fmt, ie-caller-name (), ie-caller-order); + location_t loc = gimple_safe_location (ie-call_stmt); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + discovered direct call to non-function in %s/%i, + making it __builtin_unreachable\n, + ie-caller-name (), ie-caller-order); } target = builtin_decl_implicit (BUILT_IN_UNREACHABLE); @@ -2745,18 +2739,11 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target
Re: [PATCH 3/5] IPA ICF pass
On 06/24/2014 10:31 PM, Jeff Law wrote: On 06/13/14 04:44, mliska wrote: Hello, this is core of IPA ICF patchset. It adds new pass and registers all needed stuff related to a newly introduced interprocedural optimization. Algorithm description: In LGEN, we visit all read-only variables and functions. For each symbol, a hash value based on e.g. number of arguments, number of BB, GIMPLE CODES is computed (similar hash is computed for read-only variables). This kind of information is streamed for LTO. In WPA, we build congruence classes for all symbols having a same hash value. For functions, these classes are subdivided in WPA by argument type comparison. Each reference (a call or a variable reference) to another semantic item candidate is marked and stored for further congruence class reduction (similar algorithm as Value Numbering: www.cs.ucr.edu/~gupta/teaching/553-07/Papers/value.pdf). For every congruence class of functions with more than one semantic function, we load function body. Having this information, we can process complete semantic function equality and subdivide such congruence class. Read-only variable class members are also deeply compared. After that, we process Value numbering algorithm to do a final subdivision. Finally, all items belonging to a congruence class with more than one item are merged. Martin Changelog: 2014-06-13 Martin Liska mli...@suse.cz Jan Hubicka hubi...@ucw.cz * Makefile.in: New pass object file added. * common.opt: New -fipa-icf flag introduced. * doc/invoke.texi: Documentation enhanced for the pass. * lto-section-in.c: New LTO section for a summary created by IPA-ICF. * lto-streamer.h: New section name introduced. * opts.c: Optimization is added to -O2. * passes.def: New pass added. * timevar.def: New time var for IPA-ICF. * tree-pass.h: Pass construction function. * ipa-icf.h: New pass header file added. * ipa-icf.c: New pass source file added. Hi Jeff, I must agree that the implementation of the patch is quite big. Suggested split makes sense, I'll do it. You'll note many of my comments are do you need to You may in fact be handling that stuff correctly, they're just things I'd like you to verify are properly handled. If they're properly handled just say so :-) At a high level, I think this needs to be broken down a bit more. We've got two high level concepts in ipa-icf. One is all the equivalence testing the other is using that information for the icf optimization. Splitting out the equivalence testing seems like a good thing to do as there's other contexts where it would be useful. Overall I think you're on the right path and we just need to iterate a bit on this part of the patchset. @@ -7862,6 +7863,14 @@ it may significantly increase code size (see @option{--param ipcp-unit-growth=@var{value}}). This flag is enabled by default at @option{-O3}. +@item -fipa-icf +@opindex fipa-icf +Perform Identical Code Folding for functions and read-only variables. +Behavior is similar to Gold Linker ICF optimization. Symbols proved +as semantically equivalent are redirected to corresponding symbol. The pass +sensitively decides for usage of alias, thunk or local redirection. +This flag is enabled by default at @option{-O2}. So you've added this at -O2, what is the general compile-time impact? Would it make more sense to instead have it be part of -O3, particularly since ICF is rarely going to improve performance (sans icache issues). This was Honza's idea to put the optimization for -O2, I'll measure compile-time impact. + +/* Interprocedural Identical Code Folding for functions and + read-only variables. + + The goal of this transformation is to discover functions and read-only + variables which do have exactly the same semantics. + + In case of functions, + we could either create a virtual clone or do a simple function wrapper + that will call equivalent function. If the function is just locally visible, + all function calls can be redirected. For read-only variables, we create + aliases if possible. + + Optimization pass arranges as follows: + 1) All functions and read-only variables are visited and internal + data structure, either sem_function or sem_variables is created. + 2) For every symbol from the previoues step, VAR_DECL and FUNCTION_DECL are + saved and matched to corresponding sem_items. s/previoues/previous/ + 3) These declaration are ignored for equality check and are solved + by Value Numbering algorithm published by Alpert, Zadeck in 1992. + 4) We compute hash value for each symbol. + 5) Congruence classes are created based on hash value. If hash value are + equal, equals function is called and symbols are deeply compared. + We must prove that all SSA names, declarations and other items + correspond. + 6) Value Numbering is executed for these classes.
Re: [PATCH] Devirtualization dump functions fix
On 06/27/2014 10:38 AM, Richard Biener wrote: On Thu, Jun 26, 2014 at 5:58 PM, Martin Liška mli...@suse.cz wrote: On 06/26/2014 04:29 PM, Jakub Jelinek wrote: On Thu, Jun 26, 2014 at 04:27:49PM +0200, Martin Liška wrote: Well yes - it is of course similar broken in spirit but at least a lot simpler ;) I'd put a comment there why we do check g for NULL. But it increases overhead, there are hundreds of gimple_location calls and most of them will never pass NULL. Can't you simply do what you do in the inline here in the couple of spots where the stmt might be NULL? Sure, do you have any suggestion how should be called such function? Suggestion: gimple_location_or_unknown ? gimple_location_safe or gimple_safe_location? Jakub Thanks, there's new patch. Patch has been tested for Firefox with -flto -fdump-ipa-devirt. Bootstrap and regression tests have been running. Ready for trunk after regression tests? Ok with s/gimple_safe_location/gimple_location_safe/ (I think that's the more canonical naming - what's a safe location after all?) Thanks, Richard. You are right, gimple_location_safe sounds better. Patch has been just committed with your change. Thanks, Martin ChangeLog: 2014-06-26 Martin Liska mli...@suse.cz * gimple.h (gimple_safe_location): New function introduced. * cgraphunit.c (walk_polymorphic_call_targets): Usage of gimple_safe_location replaces gimple_location. (gimple_fold_call): Likewise. * ipa-devirt.c (ipa_devirt): Likewise. * ipa-prop.c (ipa_make_edge_direct_to_target): Likewise. * ipa.c (walk_polymorphic_call_targets): Likewise. * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children): Likewise.
[PATCH] IPA REF: alias refactoring
Hi, this patch enhances alias manipulation for symtab_node. Honza suggested following changes. Patch is pre approved, will be committed if no comments and regressions. Bootstrapped on x86_64-pc-linux-gnu, regression tests have been running. Thanks, Martin gcc/ChangeLog: * cgraph.h (iterate_direct_aliases): New function. (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node. * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of FOR_EACH_ALIAS added. (cgraph_for_node_and_aliases): Likewise. * cgraphunit.c (assemble_thunks_and_aliases): Likewise. * ipa-inline.c (reset_edge_caches): Likewise. (update_caller_keys): Likewise. * trans-mem.c (ipa_tm_execute): Likewise. *varpool.c (varpool_analyze_node): Likewise. (varpool_for_node_and_aliases): Likewise. * ipa-ref.h (first_referring_alias): New function. (last_referring_alias): Likewise. * ipa-ref.c (ipa_ref::remove_reference): Removal function is sensitive to IPA_REF_ALIASes. * symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type are put at the beginning of the list. (symtab_node::iterate_direct_aliases): New function. gcc/lto/ChangeLog: * lto-partition.c (add_symbol_to_partition_1): Usage of FOR_EACH_ALIAS added. diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 7360f77..568eb45 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2194,8 +2194,7 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node, bool include_overwritable) { struct cgraph_edge *e; - int i; - struct ipa_ref *ref = NULL; + struct ipa_ref *ref; if (callback (node, data)) return true; @@ -2206,16 +2205,16 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node, if (cgraph_for_node_thunks_and_aliases (e-caller, callback, data, include_overwritable)) return true; - for (i = 0; node-iterate_referring (i, ref); i++) -if (ref-use == IPA_REF_ALIAS) - { - struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); - if (include_overwritable - || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) - if (cgraph_for_node_thunks_and_aliases (alias, callback, data, - include_overwritable)) - return true; - } + + FOR_EACH_ALIAS (node, ref) +{ + struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); + if (include_overwritable + || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) + if (cgraph_for_node_thunks_and_aliases (alias, callback, data, + include_overwritable)) + return true; +} return false; } @@ -2229,21 +2228,20 @@ cgraph_for_node_and_aliases (struct cgraph_node *node, void *data, bool include_overwritable) { - int i; - struct ipa_ref *ref = NULL; + struct ipa_ref *ref; if (callback (node, data)) return true; - for (i = 0; node-iterate_referring (i, ref); i++) -if (ref-use == IPA_REF_ALIAS) - { - struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); - if (include_overwritable - || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) - if (cgraph_for_node_and_aliases (alias, callback, data, - include_overwritable)) - return true; - } + + FOR_EACH_ALIAS (node, ref) +{ + struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); + if (include_overwritable + || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) + if (cgraph_for_node_and_aliases (alias, callback, data, + include_overwritable)) + return true; +} return false; } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 0761e26..3ab0516 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -254,6 +254,9 @@ public: /* Iterates I-th referring item in the list, REF is also set. */ struct ipa_ref *iterate_referring (unsigned i, struct ipa_ref *ref); + /* Iterates I-th referring alias item in the list, REF is also set. */ + struct ipa_ref *iterate_direct_aliases (unsigned i, struct ipa_ref *ref); + /* Vectors of referring and referenced entities. */ struct ipa_ref_list ref_list; @@ -281,6 +284,10 @@ public: priority_type get_init_priority (); }; +/* Walk all aliases for NODE. */ +#define FOR_EACH_ALIAS(node, alias) \ + for (unsigned x_i = 0; node-iterate_direct_aliases (x_i, alias); x_i++) + enum availability { /* Not yet set by cgraph_function_body_availability. */ diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 76b2fda1..b0478cb 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1711,8 +1711,7 @@ static void assemble_thunks_and_aliases (struct cgraph_node *node) { struct cgraph_edge *e; - int i; - struct ipa_ref *ref = NULL; + struct ipa_ref *ref; for (e = node-callers; e;) if (e-caller-thunk.thunk_p) @@ -1725,20 +1724,20 @@ assemble_thunks_and_aliases (struct cgraph_node *node) } else e = e-next_caller; - for (i = 0; node-iterate_referring
Re: [PATCH 2/5] Existing call graph infrastructure enhancement
On 06/17/2014 10:00 PM, Jeff Law wrote: On 06/13/14 04:26, mliska wrote: Hi, this small patch prepares remaining needed infrastructure for the new pass. Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * ipa-utils.h (polymorphic_type_binfo_p): Function marked external instead of static. * ipa-devirt.c (polymorphic_type_binfo_p): Likewise. * ipa-prop.h (count_formal_params): Likewise. * ipa-prop.c (count_formal_params): Likewise. * ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge profiles for semantically equivalent functions. * passes.c (do_per_function): If we load body of a function during WPA, this condition should behave same. * varpool.c (ctor_for_folding): More tolerant assert for variable aliases created during WPA. Presumably we don't have any useful way to merge the cases where we have provides for SRC DST in ipa_merge_profiles or even to guess which is more useful when presented with both? Does it make sense to log this into a debugging file when we drop one? Hello, this merge function was written by Honza, what do you think Honza about this note? I think this patch is fine. If adding logging makes sense, then feel free to do so and consider that trivial change pre-approved. I made a small change to this patch, where I moved 'gsi_next_nonvirtual_phi' from the pass to gimple-iterator.h. Ready for trunk with this change? Thanks, Martin gcc/ChangeLog 2014-06-30 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * gimple-iterator.h (gsi_next_nonvirtual_phi): New function. * ipa-utils.h (polymorphic_type_binfo_p): Function marked external instead of static. * ipa-devirt.c (polymorphic_type_binfo_p): Likewise. * ipa-prop.h (count_formal_params): Likewise. * ipa-prop.c (count_formal_params): Likewise. * ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge profiles for semantically equivalent functions. * passes.c (do_per_function): If we load body of a function during WPA, this condition should behave same. * varpool.c (ctor_for_folding): More tolerant assert for variable aliases created during WPA. Jeff diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h index 909d58b..47168b9 100644 --- a/gcc/gimple-iterator.h +++ b/gcc/gimple-iterator.h @@ -281,6 +281,30 @@ gsi_last_nondebug_bb (basic_block bb) return i; } +/* Iterates I statement iterator to the next non-virtual statement. */ + +static inline void +gsi_next_nonvirtual_phi (gimple_stmt_iterator *i) +{ + gimple phi; + + if (gsi_end_p (*i)) +return; + + phi = gsi_stmt (*i); + gcc_assert (phi != NULL); + + while (virtual_operand_p (gimple_phi_result (phi))) +{ + gsi_next (i); + + if (gsi_end_p (*i)) + return; + + phi = gsi_stmt (*i); +} +} + /* Return the basic block associated with this iterator. */ static inline basic_block diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c index d6b85bf..2f62323 100644 --- a/gcc/ipa-devirt.c +++ b/gcc/ipa-devirt.c @@ -176,7 +176,7 @@ struct GTY(()) odr_type_d inheritance (because vtables are shared). Look up the BINFO of type and check presence of its vtable. */ -static inline bool +bool polymorphic_type_binfo_p (tree binfo) { /* See if BINFO's type has an virtual table associtated with it. */ diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 68efc77..bb2fbf3 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -210,7 +210,7 @@ ipa_populate_param_decls (struct cgraph_node *node, /* Return how many formal parameters FNDECL has. */ -static inline int +int count_formal_params (tree fndecl) { tree parm; diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 8886e93..bc6249e 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -529,6 +529,7 @@ void ipa_free_all_edge_args (void); void ipa_free_all_structures_after_ipa_cp (void); void ipa_free_all_structures_after_iinln (void); void ipa_register_cgraph_hooks (void); +int count_formal_params (tree fndecl); /* This function ensures the array of node param infos is big enough to accommodate a structure for all nodes and reallocates it if not. */ diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c index c191210..d58b170 100644 --- a/gcc/ipa-utils.c +++ b/gcc/ipa-utils.c @@ -660,13 +660,8 @@ ipa_merge_profiles (struct cgraph_node *dst, if (dst-tp_first_run src-tp_first_run src-tp_first_run) dst-tp_first_run = src-tp_first_run; - if (src-profile_id) -{ - if (!dst-profile_id) - dst-profile_id = src-profile_id; - else - gcc_assert (src-profile_id == dst-profile_id); -} + if (src-profile_id !dst-profile_id) +dst-profile_id = src-profile_id; if (!dst-count) return; diff --git a/gcc/ipa-utils.h b/gcc/ipa-utils.h index a2c985a..996249a 100644 --- a/gcc/ipa-utils.h +++ b/gcc/ipa-utils.h @@ -72,6 +72,8 @@ struct odr_type_d; typedef odr_type_d *odr_type;
Re: [PATCH 4/5] Existing tests fix
On 06/17/2014 09:52 PM, Jeff Law wrote: On 06/13/14 04:48, mliska wrote: Hi, many tests rely on a precise number of scanned functions in a dump file. If IPA ICF decides to merge some function and(or) read-only variables, counts do not match. Martin Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * c-c++-common/rotate-1.c: Text * c-c++-common/rotate-2.c: New test. * c-c++-common/rotate-3.c: Likewise. * c-c++-common/rotate-4.c: Likewise. * g++.dg/cpp0x/rv-return.C: Likewise. * g++.dg/cpp0x/rv1n.C: Likewise. * g++.dg/cpp0x/rv1p.C: Likewise. * g++.dg/cpp0x/rv2n.C: Likewise. * g++.dg/cpp0x/rv3n.C: Likewise. * g++.dg/cpp0x/rv4n.C: Likewise. * g++.dg/cpp0x/rv5n.C: Likewise. * g++.dg/cpp0x/rv6n.C: Likewise. * g++.dg/cpp0x/rv7n.C: Likewise. * gcc.dg/ipa/ipacost-1.c: Likewise. * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/ipa/ipcp-agg-6.c: Likewise. * gcc.dg/ipa/remref-2a.c: Likewise. * gcc.dg/ipa/remref-2b.c: Likewise. * gcc.dg/pr46309-2.c: Likewise. * gcc.dg/torture/ipa-pta-1.c: Likewise. * gcc.dg/tree-ssa/andor-3.c: Likewise. * gcc.dg/tree-ssa/andor-4.c: Likewise. * gcc.dg/tree-ssa/andor-5.c: Likewise. * gcc.dg/vect/no-vfa-pr29145.c: Likewise. * gcc.dg/vect/vect-cond-10.c: Likewise. * gcc.dg/vect/vect-cond-9.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise. * gcc.target/i386/bmi-1.c: Likewise. * gcc.target/i386/bmi-2.c: Likewise. * gcc.target/i386/pr56564-2.c: Likewise. * g++.dg/opt/pr30965.C: Likewise. * g++.dg/tree-ssa/pr19637.C: Likewise. * gcc.dg/guality/csttest.c: Likewise. * gcc.dg/ipa/iinline-4.c: Likewise. * gcc.dg/ipa/iinline-7.c: Likewise. * gcc.dg/ipa/ipa-pta-13.c: Likewise. I know this is the least interesting part of your changes, but it's also simple and mechanical and thus trivial to review. Approved, but obviously don't install until the rest of your patch has been approved. Similar changes for recently added tests or cases where you might improve ICF requiring similar tweaks to existing tests are pre-approved as well. jeff Hello, I fixed few more tests and added correct ChangeLog message. gcc/testsuite/ChangeLog 2014-06-30 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * c-c++-common/rotate-1.c: Test fixed. * c-c++-common/rotate-2.c: Likewise. * c-c++-common/rotate-3.c: Likewise. * c-c++-common/rotate-4.c: Likewise. * g++.dg/cpp0x/rv-return.C: Likewise. * g++.dg/cpp0x/rv1n.C: Likewise. * g++.dg/cpp0x/rv1p.C: Likewise. * g++.dg/cpp0x/rv2n.C: Likewise. * g++.dg/cpp0x/rv3n.C: Likewise. * g++.dg/cpp0x/rv4n.C: Likewise. * g++.dg/cpp0x/rv5n.C: Likewise. * g++.dg/cpp0x/rv6n.C: Likewise. * g++.dg/cpp0x/rv7n.C: Likewise. * g++.dg/ipa/devirt-g-1.C: Likewise. * g++.dg/ipa/inline-1.C: Likewise. * g++.dg/ipa/inline-2.C: Likewise. * g++.dg/ipa/inline-3.C: Likewise. * g++.dg/opt/pr30965.C: Likewise. * g++.dg/tree-ssa/pr19637.C: Likewise. * gcc.dg/guality/csttest.c: Likewise. * gcc.dg/ipa/iinline-4.c: Likewise. * gcc.dg/ipa/iinline-7.c: Likewise. * gcc.dg/ipa/ipa-pta-13.c: Likewise. * gcc.dg/ipa/ipacost-1.c: Likewise. * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/ipa/ipcp-agg-6.c: Likewise. * gcc.dg/ipa/remref-2a.c: Likewise. * gcc.dg/ipa/remref-2b.c: Likewise. * gcc.dg/pr46309-2.c: Likewise. * gcc.dg/torture/ipa-pta-1.c: Likewise. * gcc.dg/tree-ssa/andor-3.c: Likewise. * gcc.dg/tree-ssa/andor-4.c: Likewise. * gcc.dg/tree-ssa/andor-5.c: Likewise. * gcc.dg/vect/no-vfa-pr29145.c: Likewise. * gcc.dg/vect/vect-cond-10.c: Likewise. * gcc.dg/vect/vect-cond-9.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise. * gcc.target/i386/bmi-1.c: Likewise. * gcc.target/i386/bmi-2.c: Likewise. * gcc.target/i386/pr56564-2.c: Likewise. Thank you, Martin diff --git a/gcc/testsuite/c-c++-common/rotate-1.c b/gcc/testsuite/c-c++-common/rotate-1.c index afdaa28..bca9dd8 100644 --- a/gcc/testsuite/c-c++-common/rotate-1.c +++ b/gcc/testsuite/c-c++-common/rotate-1.c @@ -1,6 +1,6 @@ /* Check rotate pattern detection. */ /* { dg-do compile } */ -/* { dg-options -O2 -fdump-tree-optimized } */ +/* { dg-options -O2 -fno-ipa-icf -fdump-tree-optimized } */ /* { dg-final { scan-tree-dump-times r\[]\[] 96 optimized } } */ /* { dg-final { cleanup-tree-dump optimized } } */ diff --git a/gcc/testsuite/c-c++-common/rotate-2.c b/gcc/testsuite/c-c++-common/rotate-2.c index 109fd32..4ffa218 100644 --- a/gcc/testsuite/c-c++-common/rotate-2.c +++ b/gcc/testsuite/c-c++-common/rotate-2.c @@ -1,6 +1,6 @@ /* Check
Re: [PATCH 5/5] New tests introduction
On 06/17/2014 09:53 PM, Jeff Law wrote: On 06/13/14 05:16, mliska wrote: Hi, this is a new collection of tests for IPA ICF pass. Martin Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * gcc/testsuite/g++.dg/ipa/ipa-se-1.C: New test. * gcc/testsuite/g++.dg/ipa/ipa-se-2.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-se-3.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-se-4.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-se-5.C: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-1.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-10.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-11.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-12.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-13.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-14.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-15.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-16.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-17.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-18.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-19.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-2.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-20.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-21.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-22.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-23.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-24.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-25.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-26.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-27.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-28.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-3.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-4.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-5.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-6.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-7.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-8.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-se-9.c: Likewise. Also approved, but please don't install entire the entire kit is approved. I'd like to applaud you and Jan for including a nice baseline of tests. jeff Hi, there's updatd baseline of tests. Martin gcc/testsuite/ChangeLog: 2014-06-30 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * gcc/testsuite/g++.dg/ipa/ipa-icf-1.C: New test. * gcc/testsuite/g++.dg/ipa/ipa-icf-2.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-icf-3.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-icf-4.C: Likewise. * gcc/testsuite/g++.dg/ipa/ipa-icf-5.C: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-1.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-10.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-11.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-12.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-13.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-14.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-15.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-16.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-17.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-18.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-19.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-2.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-20.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-22.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-23.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-24.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-25.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-26.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-27.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-3.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-4.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-5.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-6.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-7.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-8.c: Likewise. * gcc/testsuite/gcc.dg/ipa/ipa-icf-9.c: Likewise. Martin diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C new file mode 100644 index 000..d27abf4 --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fdump-ipa-icf } */ + +class A +{ +public: + __attribute__ ((noinline)) + virtual int Foo2() + { +return v; + } + + float f; + int v; +}; + +class B +{ +public: + __attribute__ ((noinline)) + int Bar2() + { +return v; + } + + float f, aaa; + int v; +}; + +int main() +{ + A a; + B b; + + a.Foo2(); + b.Bar2(); + + return 12345; +} + +/* { dg-final { scan-ipa-dump-not Semantic equality hit: icf } } */ +/* { dg-final { scan-ipa-dump Equal symbols: 0 icf } } */ +/* { dg-final { cleanup-ipa-dump icf } } */ diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-2.C
Re: [PATCH] IPA REF: alias refactoring
On 06/28/2014 08:49 AM, Jan Hubicka wrote: Hi, this patch enhances alias manipulation for symtab_node. Honza suggested following changes. Patch is pre approved, will be committed if no comments and regressions. Bootstrapped on x86_64-pc-linux-gnu, regression tests have been running. Thanks, Martin gcc/ChangeLog: * cgraph.h (iterate_direct_aliases): New function. (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node. * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of FOR_EACH_ALIAS added. (cgraph_for_node_and_aliases): Likewise. * cgraphunit.c (assemble_thunks_and_aliases): Likewise. * ipa-inline.c (reset_edge_caches): Likewise. (update_caller_keys): Likewise. * trans-mem.c (ipa_tm_execute): Likewise. *varpool.c (varpool_analyze_node): Likewise. (varpool_for_node_and_aliases): Likewise. * ipa-ref.h (first_referring_alias): New function. (last_referring_alias): Likewise. I missed it last time around, I think first_alias/last_alias are better names. first_alias is unused. If you added it I guess FOR_EACH_ALIAS should use it. Hello, I renamed these functions as you suggested and has_aliases_p predication was also added. Previous patch has an error in ipa_ref::remove_refence, this patch has been regtested and the problem is removed. We probably also can bring has_aliases_p inline and implement it using first_referring_alias. + /* If deleted item is IPA_REF_ALIAS, we have to move last + item of IPA_REF_LIST type to the deleted position. After that + we replace last node with deletion slot. */ + struct ipa_ref *last_alias = list-last_referring_alias (); You can avoid walking to last alias when the removed item is not IPA_REF_ALIAS. + + /* IPA_REF_ALIAS is always put at the beginning of the list. */ inserted? Type fixed. If no other comments will come, I consider the patch as preapproved. Thanks, Martin gcc/ChangeLog: * cgraph.h (iterate_direct_aliases): New function. (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node. * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of FOR_EACH_ALIAS added. (cgraph_for_node_and_aliases): Likewise. * cgraphunit.c (assemble_thunks_and_aliases): Likewise. * ipa-inline.c (reset_edge_caches): Likewise. (update_caller_keys): Likewise. * trans-mem.c (ipa_tm_execute): Likewise. *varpool.c (varpool_analyze_node): Likewise. (varpool_for_node_and_aliases): Likewise. * ipa-ref.h (first_alias): New function. (last_alias): Likewise. (has_aliases_p): Likewise. * ipa-ref.c (ipa_ref::remove_reference): Removal function is sensitive to IPA_REF_ALIASes. * symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type are put at the beginning of the list. (symtab_node::iterate_direct_aliases): New function. gcc/lto/ChangeLog: * lto-partition.c (add_symbol_to_partition_1): Usage of FOR_EACH_ALIAS added. OK with these changes (or if you already comitted, just do them incrementally) Honza diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 43428be..41dcaf9 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2198,8 +2198,7 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node, bool include_overwritable) { struct cgraph_edge *e; - int i; - struct ipa_ref *ref = NULL; + struct ipa_ref *ref; if (callback (node, data)) return true; @@ -2210,16 +2209,16 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node, if (cgraph_for_node_thunks_and_aliases (e-caller, callback, data, include_overwritable)) return true; - for (i = 0; node-iterate_referring (i, ref); i++) -if (ref-use == IPA_REF_ALIAS) - { - struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); - if (include_overwritable - || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) - if (cgraph_for_node_thunks_and_aliases (alias, callback, data, - include_overwritable)) - return true; - } + + FOR_EACH_ALIAS (node, ref) +{ + struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); + if (include_overwritable + || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) + if (cgraph_for_node_thunks_and_aliases (alias, callback, data, + include_overwritable)) + return true; +} return false; } @@ -2233,21 +2232,20 @@ cgraph_for_node_and_aliases (struct cgraph_node *node, void *data, bool include_overwritable) { - int i; - struct ipa_ref *ref = NULL; + struct ipa_ref *ref; if (callback (node, data)) return true; - for (i = 0; node-iterate_referring (i, ref); i++) -if (ref-use == IPA_REF_ALIAS) - { - struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring); - if (include_overwritable - || cgraph_function_body_availability (alias) AVAIL_OVERWRITABLE) - if
Re: [PATCH] IPA REF: alias refactoring
On 07/01/2014 12:21 AM, Jan Hubicka wrote: gcc/ChangeLog: * cgraph.h (iterate_direct_aliases): New function. (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node. * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of FOR_EACH_ALIAS added. (cgraph_for_node_and_aliases): Likewise. * cgraphunit.c (assemble_thunks_and_aliases): Likewise. * ipa-inline.c (reset_edge_caches): Likewise. (update_caller_keys): Likewise. * trans-mem.c (ipa_tm_execute): Likewise. *varpool.c (varpool_analyze_node): Likewise. (varpool_for_node_and_aliases): Likewise. * ipa-ref.h (first_alias): New function. (last_alias): Likewise. (has_aliases_p): Likewise. * ipa-ref.c (ipa_ref::remove_reference): Removal function is sensitive to IPA_REF_ALIASes. * symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type are put at the beginning of the list. (symtab_node::iterate_direct_aliases): New function. gcc/lto/ChangeLog: * lto-partition.c (add_symbol_to_partition_1): Usage of FOR_EACH_ALIAS added. OK, thanks! Honza Thanks, patch has been just commited. Martin
[PATCH, DOC]: Fix for Options That Control Optimization section
Hello, I fixed Options That Control Optimization section according to 'gcc -Q --help=optimizers' and after consultation with Jakub, I added missing -foptimize-strlen option. Ready for trunk? Martin ChangeLog: 2014-07-11 Martin Liska mli...@suse.cz * doc/invoke.texi: Added missing options to options that control optimization. Missing -foptimize-strlen option introduced. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a83f6c6..8fa63ff 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6921,25 +6921,31 @@ compilation time. @option{-O} turns on the following optimization flags: @gccoptlist{ -fauto-inc-dec @gol +-fbranch-count-reg @gol +-fcombine-stack-adjustments @gol -fcompare-elim @gol -fcprop-registers @gol -fdce @gol -fdefer-pop @gol -fdelayed-branch @gol -fdse @gol +-fforward-propagate @gol -fguess-branch-probability @gol -fif-conversion2 @gol -fif-conversion @gol +-finline-functions-called-once @gol -fipa-pure-const @gol -fipa-profile @gol -fipa-reference @gol --fmerge-constants +-fmerge-constants @gol +-fmove-loop-invariants @gol +-fshrink-wrap @gol -fsplit-wide-types @gol -ftree-bit-ccp @gol --ftree-builtin-call-dce @gol -ftree-ccp @gol -fssa-phiopt @gol -ftree-ch @gol +-ftree-copy-prop @gol -ftree-copyrename @gol -ftree-dce @gol -ftree-dominator-opts @gol @@ -6947,6 +6953,7 @@ compilation time. -ftree-forwprop @gol -ftree-fre @gol -ftree-phiprop @gol +-ftree-sink @gol -ftree-slsr @gol -ftree-sra @gol -ftree-pta @gol @@ -6978,19 +6985,23 @@ also turns on the following optimization flags: -fhoist-adjacent-loads @gol -finline-small-functions @gol -findirect-inlining @gol +-fipa-cp @gol -fipa-sra @gol -fisolate-erroneous-paths-dereference @gol -foptimize-sibling-calls @gol +-foptimize-strlen @gol -fpartial-inlining @gol -fpeephole2 @gol --freorder-blocks -freorder-functions @gol +-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol -frerun-cse-after-loop @gol -fsched-interblock -fsched-spec @gol -fschedule-insns -fschedule-insns2 @gol -fstrict-aliasing -fstrict-overflow @gol +-ftree-builtin-call-dce @gol -ftree-switch-conversion -ftree-tail-merge @gol -ftree-pre @gol --ftree-vrp} +-ftree-vrp @gol +-fuse-caller-save} Please note the warning under @option{-fgcse} about invoking @option{-O2} on programs that use computed gotos. @@ -7000,9 +7011,10 @@ invoking @option{-O2} on programs that use computed gotos. Optimize yet more. @option{-O3} turns on all optimizations specified by @option{-O2} and also turns on the @option{-finline-functions}, @option{-funswitch-loops}, @option{-fpredictive-commoning}, -@option{-fgcse-after-reload}, @option{-ftree-loop-vectorize}, -@option{-ftree-slp-vectorize}, @option{-fvect-cost-model}, -@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options. +@option{-fgcse-after-reload}, @option{-ftree-loop-distribute-patterns}, +@option{-ftree-loop-vectorize}, @option{-ftree-slp-vectorize}, +@option{-fvect-cost-model}, @option{-ftree-partial-pre} +and @option{-fipa-cp-clone} options. @item -O0 @opindex O0 @@ -7113,6 +7125,14 @@ Optimize sibling and tail recursive calls. Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. +@item -foptimize-strlen +@opindex foptimize-strlen +Optimize various standard C string functions (e.g. @code{strlen}, +@code{strchr} or @code{strcpy}) and +their _FORTIFY_SOURCE counterparts into faster alternatives. + +Enabled at levels @option{-O2}, @option{-O3}. + @item -fno-inline @opindex fno-inline Do not expand any functions inline apart from those marked with @@ -7278,6 +7298,8 @@ register, compare it against zero, then branch based upon the result. This option is only meaningful on architectures that support such instructions, which include x86, PowerPC, IA-64 and S/390. +Enabled by default at -O1 and higher. + The default is @option{-fbranch-count-reg}. @item -fno-function-cse
Re: [RFC, PATCH 1/n] IPA C++ refactoring
Hello On 07/11/2014 12:07 PM, Jan Hubicka wrote: Hi, this first patch continues with rafactoring of IPA infrastructure so that we will have C++ API. In the patch, I transformed many global functions to members of symtab_node and cgraph_node. Example: cgraph_remove_node (struct cgraph_node *node) - cgraph_node::remove (void) symtab_unregister_node (symtab_node *node) - symtab_node::unregister (void) The patch is being consulted with Honza and will iterate. We want to inform folk that we plan to do following changes. After the patch is applied, I would like to transform varpool_node and cgraph_edge in the following patch. Thank you for your comments, Martin /* Remove the node from cgraph. */ Perhaps Remove function from symbol table. (similarly for varpool, perhaps few other block comments needs revisiting. We may do that incrementally.) + /* Add node into symbol table. This function is not used directly, but via + cgraph/varpool node creation routines. */ + void register_symbol (void); + + /* Remove symtab node from the symbol table. */ + void remove (void); + + /* Dump symtab node to F. */ + void dump (FILE *f); + + /* Dump symtab node to stderr. */ + void DEBUG_FUNCTION debug (void); + + /* Verify consistency of node. */ + void DEBUG_FUNCTION verify (void); + + /* Return ipa reference from this symtab_node to + REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type + of the use and STMT the statement (if it exists). */ + struct ipa_ref *add_reference (symtab_node *referred_node, + enum ipa_ref_use use_type); + + /* Return ipa reference from this symtab_node to + REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type + of the use and STMT the statement (if it exists). */ + struct ipa_ref *add_reference (symtab_node *referred_node, +enum ipa_ref_use use_type, gimple stmt); + + /* If VAL is a reference to a function or a variable, add a reference from + this symtab_node to the corresponding symbol table node. USE_TYPE specify + type of the use and STMT the statement (if it exists). Return the new + reference or NULL if none was created. */ + struct ipa_ref *maybe_add_reference (tree val, enum ipa_ref_use use_type, + gimple stmt); + + /* Clone all references from symtab NODE to this symtab_node. */ + void clone_references (symtab_node *node); + + /* Remove all stmt references in non-speculative references. + Those are not maintained during inlining clonning. + The exception are speculative references that are updated along + with callgraph edges associated with them. */ + void clone_referring (symtab_node *node); + + /* Clone reference REF to this symtab_node and set its stmt to STMT. */ + struct ipa_ref *clone_reference (struct ipa_ref *ref, gimple stmt); + + /* Find the structure describing a reference to REFERRED_NODE + and associated with statement STMT. */ + struct ipa_ref *find_reference (symtab_node *, gimple, unsigned int); + + /* Remove all references that are associated with statement STMT. */ + void remove_stmt_references (gimple stmt); + + /* Remove all stmt references in non-speculative references. + Those are not maintained during inlining clonning. + The exception are speculative references that are updated along + with callgraph edges associated with them. */ + void clear_stmts_in_references (void); + + /* Remove all references in ref list. */ + void remove_all_references (void); + + /* Remove all referring items in ref list. */ + void remove_all_referring (void); + + /* Dump references in ref list to FILE. */ + void dump_references (FILE *file); + + /* Dump referring in list to FILE. */ + void dump_referring (FILE *); + + /* Return true if symtab node and TARGET represents + semantically equivalent symbols. */ + bool semantically_equivalent_p (symtab_node *target); + + /* Classify symbol symtab node for partitioning. */ + enum symbol_partitioning_class get_partitioning_class (void); + + /* Return comdat group. */ + tree get_comdat_group () +{ + return x_comdat_group; +} + + /* Return comdat group as identifier_node. */ + tree get_comdat_group_id () +{ + if (x_comdat_group TREE_CODE (x_comdat_group) != IDENTIFIER_NODE) + x_comdat_group = DECL_ASSEMBLER_NAME (x_comdat_group); + return x_comdat_group; +} + + /* Set comdat group. */ + void set_comdat_group (tree group) +{ + gcc_checking_assert (!group || TREE_CODE (group) == IDENTIFIER_NODE + || DECL_P (group)); + x_comdat_group = group; +} + + /* Return section as string. */ + const char * get_section () +{ + if (!x_section) + return NULL; + return x_section-name; +} + + /* Remove node from same comdat group. */ + void remove_from_same_comdat_group (void); + + /*
Re: [PATCH 2/5] Existing call graph infrastructure enhancement
On 06/30/2014 08:54 PM, Jeff Law wrote: On 06/30/14 05:49, Martin Liška wrote: On 06/17/2014 10:00 PM, Jeff Law wrote: On 06/13/14 04:26, mliska wrote: Hi, this small patch prepares remaining needed infrastructure for the new pass. Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * ipa-utils.h (polymorphic_type_binfo_p): Function marked external instead of static. * ipa-devirt.c (polymorphic_type_binfo_p): Likewise. * ipa-prop.h (count_formal_params): Likewise. * ipa-prop.c (count_formal_params): Likewise. * ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge profiles for semantically equivalent functions. * passes.c (do_per_function): If we load body of a function during WPA, this condition should behave same. * varpool.c (ctor_for_folding): More tolerant assert for variable aliases created during WPA. Presumably we don't have any useful way to merge the cases where we have provides for SRC DST in ipa_merge_profiles or even to guess which is more useful when presented with both? Does it make sense to log this into a debugging file when we drop one? Hello, this merge function was written by Honza, what do you think Honza about this note? I think this patch is fine. If adding logging makes sense, then feel free to do so and consider that trivial change pre-approved. I made a small change to this patch, where I moved 'gsi_next_nonvirtual_phi' from the pass to gimple-iterator.h. Ready for trunk with this change? Yes. I think with the exception of patch #3/5 everything looks good. I'll try to get another pass over #3 this week. What I looked at last week was pretty good; I'm pretty confident this will be wrapped up shortly. If #1/#2 make sense to install independent of #3, go ahead. #4/#5 are obviously dependent on #3. Jeff Hello, thank you for approval, this final version removes few hunks that are not needed any more. Changes are just cosmetic and I will commit the patch at the beginning of next week. Thanks, Martin diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h index 909d58b..47168b9 100644 --- a/gcc/gimple-iterator.h +++ b/gcc/gimple-iterator.h @@ -281,6 +281,30 @@ gsi_last_nondebug_bb (basic_block bb) return i; } +/* Iterates I statement iterator to the next non-virtual statement. */ + +static inline void +gsi_next_nonvirtual_phi (gimple_stmt_iterator *i) +{ + gimple phi; + + if (gsi_end_p (*i)) +return; + + phi = gsi_stmt (*i); + gcc_assert (phi != NULL); + + while (virtual_operand_p (gimple_phi_result (phi))) +{ + gsi_next (i); + + if (gsi_end_p (*i)) + return; + + phi = gsi_stmt (*i); +} +} + /* Return the basic block associated with this iterator. */ static inline basic_block diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 40f696b..aecba07 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -211,7 +211,7 @@ ipa_populate_param_decls (struct cgraph_node *node, /* Return how many formal parameters FNDECL has. */ -static inline int +int count_formal_params (tree fndecl) { tree parm; diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 8886e93..bc6249e 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -529,6 +529,7 @@ void ipa_free_all_edge_args (void); void ipa_free_all_structures_after_ipa_cp (void); void ipa_free_all_structures_after_iinln (void); void ipa_register_cgraph_hooks (void); +int count_formal_params (tree fndecl); /* This function ensures the array of node param infos is big enough to accommodate a structure for all nodes and reallocates it if not. */ diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c index c191210..d58b170 100644 --- a/gcc/ipa-utils.c +++ b/gcc/ipa-utils.c @@ -660,13 +660,8 @@ ipa_merge_profiles (struct cgraph_node *dst, if (dst-tp_first_run src-tp_first_run src-tp_first_run) dst-tp_first_run = src-tp_first_run; - if (src-profile_id) -{ - if (!dst-profile_id) - dst-profile_id = src-profile_id; - else - gcc_assert (src-profile_id == dst-profile_id); -} + if (src-profile_id !dst-profile_id) +dst-profile_id = src-profile_id; if (!dst-count) return; diff --git a/gcc/passes.c b/gcc/passes.c index 61b4c12..bae302b 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1478,7 +1478,7 @@ do_per_function (void (*callback) (function *, void *data), void *data) { struct cgraph_node *node; FOR_EACH_DEFINED_FUNCTION (node) - if (node-analyzed gimple_has_body_p (node-decl) + if (node-analyzed (gimple_has_body_p (node-decl) !in_lto_p) (!node-clone_of || node-decl != node-clone_of-decl)) callback (DECL_STRUCT_FUNCTION (node-decl), data); } diff --git a/gcc/varpool.c b/gcc/varpool.c index 04ce714..5662985 100644 --- a/gcc/varpool.c +++ b/gcc/varpool.c @@ -397,6 +397,7 @@ ctor_for_folding (tree decl) if (decl != real_decl) { gcc_assert (!DECL_INITIAL (decl) + || (node-alias
Re: [PATCH, go]: Restore bootstrap
On 07/24/2014 07:57 PM, Uros Bizjak wrote: Hello! 2014-07-24 Uros Bizjak ubiz...@gmail.com * go/go-gcc.cc (Gcc_backend::global_variable_set_init): Rename symtab_get_node to symtab_node::get. Tested on x86_64-linux-gnu and committed to mainline SVN. Uros. Hello, thank you for your fix. As I see my configure output: $ ../configure --disable-bootstrap --enable-checking=release --enable-languages=all --disable-multilib The following languages will be built: c,c++,fortran,java,lto,objc *** This configuration is not supported in the following subdirectories: gnattools target-libada target-libgo target-libbacktrace (Any other directories should still work fine.) I thought with 'all' I cover all front-ends, is it really intended behavior that go is disable by 'all'? Thank you, Martin
Re: [PATCH, go]: Restore bootstrap
On 07/25/2014 03:51 AM, Ian Lance Taylor wrote: On Thu, Jul 24, 2014 at 12:21 PM, Martin Liška mli...@suse.cz wrote: On 07/24/2014 07:57 PM, Uros Bizjak wrote: Hello! 2014-07-24 Uros Bizjak ubiz...@gmail.com * go/go-gcc.cc (Gcc_backend::global_variable_set_init): Rename symtab_get_node to symtab_node::get. Tested on x86_64-linux-gnu and committed to mainline SVN. Uros. Hello, thank you for your fix. As I see my configure output: $ ../configure --disable-bootstrap --enable-checking=release --enable-languages=all --disable-multilib The following languages will be built: c,c++,fortran,java,lto,objc *** This configuration is not supported in the following subdirectories: gnattools target-libada target-libgo target-libbacktrace (Any other directories should still work fine.) I thought with 'all' I cover all front-ends, is it really intended behavior that go is disable by 'all'? Yes, --enable-languages=all is the default, so it only builds the frontends that are enabled by default, so it does not build the Go or Ada frontends. I know it doesn't make much sense. Ian I would suggest to replace currently used 'all' to 'default'. And 'all' can be really used for all possible frontends we have :) I know it changes the behavior, but I hope I makes sense? Thank you, Martin
Re: [PATCH, go]: Restore bootstrap
On 07/28/2014 05:24 PM, Mike Stump wrote: On Jul 28, 2014, at 3:29 AM, Gerald Pfeifer ger...@pfeifer.com wrote: On Fri, 25 Jul 2014, Martin Liška wrote: Yes, --enable-languages=all is the default, so it only builds the frontends that are enabled by default, so it does not build the Go or Ada frontends. I know it doesn't make much sense. I would suggest to replace currently used 'all' to 'default'. And 'all' can be really used for all possible frontends we have :) I know it changes the behavior, but I hope I makes sense? I'd be in favor of that (assuming you mean all that we have and that are supported for the host/target combination in questions). When I want to do all, I actually do want to do all. I think as long as we retain a spelling for the trimmed list, (aka default or most or some spelling), I think it would be an improvement. A second option would be a noimeanreallyall spelling to get all and leave all alone. I don’t favor that. Hello, I am not a configure script guru, but what do you think about suggested patch? Thanks, MArtin diff --git a/configure b/configure index 353730b..e5eb557 100755 --- a/configure +++ b/configure @@ -6298,7 +6298,7 @@ if test -d ${srcdir}/gcc; then enable_languages=${LANGUAGES} echo configure.in: warning: setting LANGUAGES is deprecated, use --enable-languages instead 12 else - enable_languages=all + enable_languages=default fi else if test x${enable_languages} = x || @@ -6350,6 +6350,7 @@ if test -d ${srcdir}/gcc; then for other in ${lang_requires} ${lang_requires_boot_languages}; do case ,${enable_languages}, in *,$other,*) ;; + *,default,*) ;; *,all,*) ;; *,$language,*) echo \`$other' language required by \`$language'; enabling 12 @@ -6361,6 +6362,7 @@ if test -d ${srcdir}/gcc; then if test $other != c; then case ,${enable_stage1_languages}, in *,$other,*) ;; + *,default,*) ;; *,all,*) ;; *) case ,${enable_languages}, in @@ -6393,7 +6395,7 @@ if test -d ${srcdir}/gcc; then fi - missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,c,/,/ ` + missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,default,/,/ -e s/,c,/,/ ` potential_languages=,c, enabled_target_libs= @@ -6433,12 +6435,18 @@ if test -d ${srcdir}/gcc; then add_this_lang=yes fi ;; - *,all,*) -# 'all' was selected, select it if it is a default language + *,default,*) +# 'default' was selected, select it if it is a default language if test $language != c; then add_this_lang=${build_by_default} fi ;; + *,all,*) +# 'all' was selected, add the language + if test $language != c; then + add_this_lang=yes + fi +;; esac # Disable languages that need other directories if these aren't available. diff --git a/configure.ac b/configure.ac index d0f7471..2d99dc5 100644 --- a/configure.ac +++ b/configure.ac @@ -1759,7 +1759,7 @@ if test -d ${srcdir}/gcc; then enable_languages=${LANGUAGES} echo configure.in: warning: setting LANGUAGES is deprecated, use --enable-languages instead 12 else - enable_languages=all + enable_languages=default fi else if test x${enable_languages} = x || @@ -1811,6 +1811,7 @@ if test -d ${srcdir}/gcc; then for other in ${lang_requires} ${lang_requires_boot_languages}; do case ,${enable_languages}, in *,$other,*) ;; + *,default,*) ;; *,all,*) ;; *,$language,*) echo \`$other' language required by \`$language'; enabling 12 @@ -1822,6 +1823,7 @@ if test -d ${srcdir}/gcc; then if test $other != c; then case ,${enable_stage1_languages}, in *,$other,*) ;; + *,default,*) ;; *,all,*) ;; *) case ,${enable_languages}, in @@ -1854,7 +1856,7 @@ if test -d ${srcdir}/gcc; then fi AC_SUBST(extra_host_libiberty_configure_flags) - missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,c,/,/ ` + missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,default,/,/ -e s/,c,/,/ ` potential_languages=,c, enabled_target_libs= @@ -1894,12 +1896,18 @@ if test -d ${srcdir}/gcc; then add_this_lang=yes fi ;; - *,all,*) -# 'all' was selected, select it if it is a default language + *,default,*) +# 'default' was selected, select it if it is a default language if test $language != c; then add_this_lang=${build_by_default} fi ;; + *,all,*) +# 'all' was selected, add the language + if test $language != c; then + add_this_lang=yes + fi +;; esac # Disable languages that need other directories if these aren't available.
Re: [PATCH, go]: Restore bootstrap
On 07/29/2014 07:01 PM, Mike Stump wrote: On Jul 29, 2014, at 2:10 AM, Martin Liška mli...@suse.cz wrote: I am not a configure script guru, but what do you think about suggested patch? Looks exactly like what I would do. All right, I'll send the patch, as well as documentation enhancement, to a separate mailing list thread. Thanks, Martin
Re: [PATCH] LTO streamer reorg - try to reduce WPA memory use
On 07/30/2014 11:41 AM, Richard Biener wrote: On Wed, 30 Jul 2014, Richard Biener wrote: On Wed, Jul 30, 2014 at 7:51 AM, Markus Trippelsdorf mar...@trippelsdorf.de wrote: On 2014.07.29 at 15:10 +0200, Richard Biener wrote: On Tue, 29 Jul 2014, Richard Biener wrote: This re-organizes the LTO streamer to do compression transparently in the data-streamer routines (and disables section compression by defaulting to -flto-compression-level=0). This avoids keeping the whole uncompressed sections in memory, only retaining the compressed ones. The downside is that we lose compression of at least the string parts (they are abusing the streaming interface quite awkwardly and doing random-accesses with offsets into the uncompressed section). With a little bit of surgery we can get that back I think (but we'd have to keep the uncompressed piece in memory somewhere which means losing the memory use advantage). Very lightly tested sofar (running lto.exp). I'll try a LTO bootstrap now. I wonder what the change is on WPA memory use for larger projects and what the effect on object file size is. Updated patch passing LTO bootstrap (one warning fix) and with a memory leak fixed. Testing with Firefox is impossible at the moment because of PR61885. One thing I've noticed (before the ICE) is that virtual memory usage is very high: AddressKbytes RSSDirty Mode Mapping 004016344 90840 r-x-- lto1 013f6000 36 36 28 rw--- lto1 013ff000 1072 276 276 rw--- [ anon ] 034aa000 10154940 1540384 1540384 rw--- [ anon ] 2acf04af2000 136 1360 r-x-- ld-2.19.90.so 2acf04b14000 88 88 88 rw--- [ anon ] ... --- --- --- total kB 12022060 3388396 3377708 Maybe there is still a memleak (just checked that LTOing int main() {} doesn't leak). Found it: Index: gcc/lto-section-in.c === --- gcc/lto-section-in.c.orig 2014-07-30 12:40:27.950225826 +0200 +++ gcc/lto-section-in.c2014-07-30 12:37:44.179237102 +0200 @@ -249,7 +249,7 @@ lto_destroy_simple_input_block (struct l struct lto_input_block *ib, const char *data, size_t len) { - free (ib); + delete ib; lto_free_section_data (file_data, section_type, NULL, data, len); } Richard. Hello, there's memory/CPU usage for the patch. for both, I used sync and drop_caches. Url: https://drive.google.com/file/d/0B0pisUJ80pO1andOX19JMHV3LVE/edit?usp=sharing Martin
[PATCH] Fix for ipa/63795, ipa/63622
Hello. Following patch adds checking for aliasing support. Patch can bootstrap on x86_64-apple-darwin1 and is part of patches needed for bootstrap restory on the target. I plan to introduce additional patch that will cover testsuite failures for the target. Ready for trunk? Thanks, Martin gcc/ChangeLog: 2014-11-11 Martin Liska mli...@suse.cz * ipa-icf.c (sem_function::merge): Add new target aliasing support guide. (sem_variable::merge): Likewise. * ipa-icf.h (target_supports_aliasing_p): New function. gcc/testsuite/ChangeLog: 2014-11-11 Martin Liska mli...@suse.cz * g++.dg/ipa/ipa-icf-4.C: Add more precise dump scan. * g++.dg/ipa/ipa-icf-5.C: Add condition for targets with aliasing support. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index 84cc0ca..f19c3c1 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -191,6 +191,18 @@ sem_item::dump (void) } } +/* Return true if target supports aliasing. */ + +bool +sem_item::target_supports_aliasing_p (void) +{ +#if !defined (ASM_OUTPUT_DEF) || (!defined(ASM_OUTPUT_WEAK_ALIAS) !defined (ASM_WEAKEN_DECL)) + return false; +#else + return true; +#endif +} + /* Semantic function constructor that uses STACK as bitmap memory stack. */ sem_function::sem_function (bitmap_obstack *stack): sem_item (FUNC, stack), @@ -589,7 +601,8 @@ sem_function::merge (sem_item *alias_item) redirect_callers = false; } - if (create_alias DECL_COMDAT_GROUP (alias-decl)) + if (create_alias (DECL_COMDAT_GROUP (alias-decl) + || !sem_item::target_supports_aliasing_p ())) { create_alias = false; create_thunk = true; @@ -605,6 +618,14 @@ sem_function::merge (sem_item *alias_item) local_original = dyn_cast cgraph_node * (original-noninterposable_alias ()); +if (!local_original) + { + if (dump_file) + fprintf (dump_file, Noninterposable alias cannot be created.\n\n); + + return false; + } + if (redirect_callers) { /* If alias is non-overwritable then @@ -649,7 +670,7 @@ sem_function::merge (sem_item *alias_item) alias-resolve_alias (original); /* Workaround for PR63566 that forces equal calling convention - to be used. */ + to be used. */ alias-local.local = false; original-local.local = false; @@ -1155,6 +1176,13 @@ sem_variable::merge (sem_item *alias_item) { gcc_assert (alias_item-type == VAR); + if (!sem_item::target_supports_aliasing_p ()) +{ + if (dump_file) + fprintf (dump_file, Aliasing is not supported by target\n\n); + return false; +} + sem_variable *alias_var = static_castsem_variable * (alias_item); varpool_node *original = get_node (); diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h index d8e7b16..6e15166 100644 --- a/gcc/ipa-icf.h +++ b/gcc/ipa-icf.h @@ -138,9 +138,11 @@ public: /* Return base tree that can be used for compatible_types_p and contains_polymorphic_type_p comparison. */ - static bool get_base_types (tree *t1, tree *t2); + /* Return true if target supports aliasing. */ + static bool target_supports_aliasing_p (void); + /* Item type. */ sem_item_type type; diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C index 9434289..67f2744 100644 --- a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C +++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C @@ -43,6 +43,6 @@ int main() return 123; } -/* { dg-final { scan-ipa-dump Varpool alias has been created icf } } */ +/* { dg-final { scan-ipa-dump \(Varpool alias has been created\)|\(Aliasing is not supported by target\) icf } } */ /* { dg-final { scan-ipa-dump Equal symbols: 6 icf } } */ /* { dg-final { cleanup-ipa-dump icf } } */ diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C index f835814..57dcb78 100644 --- a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C +++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-require-visibility } */ +/* { dg-require-alias } */ /* { dg-options -O2 -fdump-ipa-icf } */ struct test
Re: [PATCH] Fix some ICF gimple_call handling issues
On 11/11/2014 12:11 AM, Jakub Jelinek wrote: On Mon, Nov 10, 2014 at 10:08:54PM +0100, Richard Biener wrote: @@ -662,9 +662,49 @@ func_checker::compare_gimple_call (gimpl t1 = gimple_call_fndecl (s1); t2 = gimple_call_fndecl (s2); Just drop these and compare gimple_call_fn only. + tree chain1 = gimple_call_chain (s1); + tree chain2 = gimple_call_chain (s2); + + if ((chain1 !chain2) || (!chain1 chain2)) +return return_false_with_msg (Tree call chains are different); I miss a compare_operands for the call chain. Otherwise OK. Here is what I've committed after another bootstrap/regtest. Note, I've tried: __attribute__ ((noinline, noclone)) int f1 (int x) { int y = 3, z = 4; __attribute__ ((noinline, noclone)) int f2 (int a) { return a + x + y + z; } return f2 (5); } __attribute__ ((noinline, noclone)) int f3 (int x) { int y = 3, z = 4; __attribute__ ((noinline, noclone)) int f4 (int a) { return a + x + y + z; } return f4 (5); } int main () { if (f1 (9) != 21 || f3 (9) != 21) __builtin_abort (); return 0; } but ICF doesn't optimize this with or without the patch, as the structs aren't the same type (supposedly different alias set?), even when they have the same members laid out the same. Hello Jakub. You are right, more precisely types_compatible_p return false for these two structures. I'll write this situation to my TODO list. Thank you for sending the patch. Martin 2014-11-11 Jakub Jelinek ja...@redhat.com Martin Liska mli...@suse.cz * ipa-icf-gimple.c (func_checker::compare_bb): Fix comment typo. (func_checker::compare_gimple_call): Compare gimple_call_fn, gimple_call_chain, gimple_call_fntype and call flags. testsuite/ * gcc.dg/ubsan/ipa-icf-1.c: New test. * gcc.dg/ipa/ipa-icf-31.c: New test. --- gcc/ipa-icf-gimple.c.jj 2014-10-30 14:42:20.0 +0100 +++ gcc/ipa-icf-gimple.c2014-11-10 19:08:38.339986360 +0100 @@ -554,7 +554,7 @@ func_checker::parse_labels (sem_bb *bb) In general, a collection of equivalence dictionaries is built for types like SSA names, declarations (VAR_DECL, PARM_DECL, ..). This infrastructure - is utilized by every statement-by-stament comparison function. */ + is utilized by every statement-by-statement comparison function. */ bool func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) @@ -659,12 +659,39 @@ func_checker::compare_gimple_call (gimpl if (gimple_call_num_args (s1) != gimple_call_num_args (s2)) return false; - t1 = gimple_call_fndecl (s1); - t2 = gimple_call_fndecl (s2); - - /* Function pointer variables are not supported yet. */ + t1 = gimple_call_fn (s1); + t2 = gimple_call_fn (s2); if (!compare_operand (t1, t2)) -return return_false(); +return return_false (); + + /* Compare flags. */ + if (gimple_call_internal_p (s1) != gimple_call_internal_p (s2) + || gimple_call_ctrl_altering_p (s1) != gimple_call_ctrl_altering_p (s2) + || gimple_call_tail_p (s1) != gimple_call_tail_p (s2) + || gimple_call_return_slot_opt_p (s1) != gimple_call_return_slot_opt_p (s2) + || gimple_call_from_thunk_p (s1) != gimple_call_from_thunk_p (s2) + || gimple_call_va_arg_pack_p (s1) != gimple_call_va_arg_pack_p (s2) + || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p (s2) + || gimple_call_with_bounds_p (s1) != gimple_call_with_bounds_p (s2)) +return false; + + if (gimple_call_internal_p (s1) + gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2)) +return false; + + tree fntype1 = gimple_call_fntype (s1); + tree fntype2 = gimple_call_fntype (s2); + if ((fntype1 !fntype2) + || (!fntype1 fntype2) + || (fntype1 !types_compatible_p (fntype1, fntype2))) +return return_false_with_msg (call function types are not compatible); + + tree chain1 = gimple_call_chain (s1); + tree chain2 = gimple_call_chain (s2); + if ((chain1 !chain2) + || (!chain1 chain2) + || !compare_operand (chain1, chain2)) +return return_false_with_msg (static call chains are different); /* Checking of argument. */ for (i = 0; i gimple_call_num_args (s1); ++i) --- gcc/testsuite/gcc.dg/ubsan/ipa-icf-1.c.jj 2014-11-10 19:00:53.509525071 +0100 +++ gcc/testsuite/gcc.dg/ubsan/ipa-icf-1.c 2014-11-10 19:02:21.836925806 +0100 @@ -0,0 +1,23 @@ +/* { dg-do run } */ +/* { dg-skip-if { *-*-* } { * } { -O2 } } */ +/* { dg-options -fsanitize=undefined -fipa-icf } */ + +__attribute__ ((noinline, noclone)) +int f1 (int x, int y) +{ + return x + y; +} + +__attribute__ ((noinline, noclone)) +int f2 (int x, int y) +{ + return x - y; +} + +int +main () +{ + if (f1 (5, 6) != 11 || f2 (5, 6) != -1) +__builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c.jj2014-11-10 18:59:16.604294652 +0100 +++ gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c 2014-11-10 18:59:59.690519616 +0100 @@ -0,0 +1,41 @@ +/* {
Re: [PATCH 1/4] cgraph_summary data structure introduction.
On 11/13/2014 03:33 PM, Richard Biener wrote: On Thu, Nov 13, 2014 at 3:10 PM, mliska mli...@suse.cz wrote: Following patchset introduces cgraph_summary template class that should replace custom implementation of cgraph related summaries. Idea behind the patch is to provide a generic interface one can use to register custom summary related to a cgraph_node. As you know, symbol_table provides hooks for creation, deletion and duplication of a cgraph_node. If you have a pass, you need to handle all these hooks and store custom data in your data structure. Patchset can boostrap on x86_64-linux-pc and no regression was observed. Ready for trunk? Thank you, Martin --- gcc/ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2eb6f35..ad3c2bf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,4 @@ + 2014-11-12 Alan Lawrence alan.lawre...@arm.com * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices. huh? Please ignore this modification, I forgot to remove it from the email patchset. I've just introduced the change to be able to introduce git commit. Thank you for understanding, Martin -- 2.1.2
Re: [PATCH 2/4] New data structure for cgraph_summary introduced.
On 11/13/2014 03:48 PM, Markus Trippelsdorf wrote: On 2014.11.13 at 15:11 +0100, mliska wrote: Just two remarks: +template class T +class GTY((user)) cgraph_summary T * +{ +public: + /* Default construction takes SYMTAB as an argument. */ + cgraph_summary (symbol_table *symtab, bool ggc = false): m_ggc (ggc), +m_insertion_enabled (true), m_symtab (symtab) + { +cgraph_node *node; + +FOR_EACH_FUNCTION (node) +{ + gcc_assert (node-summary_uid 0); +} + +m_map = new hash_mapint, T*, summary_hashmap_traits(13, m_ggc); + +m_symtab_insertion_hook = + symtab-add_cgraph_insertion_hook + (cgraph_summary::symtab_insertion, this); + +m_symtab_removal_hook = + symtab-add_cgraph_removal_hook + (cgraph_summary::symtab_removal, this); +m_symtab_duplication_hook = + symtab-add_cgraph_duplication_hook + (cgraph_summary::symtab_duplication, this); + } + + /* Destructor. */ + virtual ~cgraph_summary () + { +destroy (); + } From https://gcc.gnu.org/wiki/CppConventions: Constructors and destructors are often much larger than programmers expect. Prefer non-inline versions unless you have evidence that the inline version is needed. ... + inline T* operator[] (int uid) + { +T **v = m_map-get (uid); The inline keyword is redundant for members inside a class definition. Please drop it. Hi. Thank you for remarks. There's new version of the patch. Thanks, Martin diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3d671c2..bf11277 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1175,6 +1175,7 @@ OBJS = \ cfgrtl.o \ symtab.o \ cgraph.o \ + cgraph_summary.o \ cgraphbuild.o \ cgraphunit.o \ cgraphclones.o \ diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e2becb9..588b6d5 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1225,6 +1225,8 @@ public: int count_materialization_scale; /* Unique id of the node. */ int uid; + /* Summary unique id of the node. */ + int summary_uid; /* ID assigned by the profiling. */ unsigned int profile_id; /* Time profiler: first run of function. */ @@ -1786,6 +1788,10 @@ public: friend class cgraph_node; friend class cgraph_edge; + symbol_table (): cgraph_max_summary_uid (1) + { + } + /* Initialize callgraph dump file. */ void initialize (void); @@ -1982,6 +1988,7 @@ public: int cgraph_count; int cgraph_max_uid; + int cgraph_max_summary_uid; int edges_count; int edges_max_uid; @@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void) node-uid = cgraph_max_uid++; } + node-summary_uid = cgraph_max_summary_uid++; return node; } diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c new file mode 100644 index 000..9af1d7e --- /dev/null +++ b/gcc/cgraph_summary.c @@ -0,0 +1,34 @@ +#include config.h +#include system.h +#include coretypes.h +#include tm.h +#include tree.h +#include predict.h +#include vec.h +#include hashtab.h +#include hash-set.h +#include machmode.h +#include hard-reg-set.h +#include input.h +#include function.h +#include dominance.h +#include cfg.h +#include basic-block.h +#include tree-ssa-alias.h +#include internal-fn.h +#include gimple-expr.h +#include is-a.h +#include gimple.h +#include tree-inline.h +#include dumpfile.h +#include langhooks.h +#include splay-tree.h +#include hash-map.h +#include plugin-api.h +#include ipa-ref.h +#include cgraph.h +#include ipa-utils.h +#include alloc-pool.h +#include cgraph_summary.h +#include ipa-prop.h +#include hash-map.h diff --git a/gcc/cgraph_summary.h b/gcc/cgraph_summary.h new file mode 100644 index 000..d89b679 --- /dev/null +++ b/gcc/cgraph_summary.h @@ -0,0 +1,301 @@ +/* Callgraph summary data structure. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Martin Liska + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ + +#ifndef GCC_CGRAPH_SUMMARY_H +#define GCC_CGRAPH_SUMMARY_H + +#define CGRAPH_SUMMARY_DELETED_VALUE -1 +#define CGRAPH_SUMMARY_EMPTY_VALUE 0 + +template class T +class cgraph_summary +{ + private: +cgraph_summary(); +}; + +template class T +class GTY((user)) cgraph_summary T * +{ +public: + /* Default construction takes SYMTAB as an argument. */ + cgraph_summary (symbol_table *symtab, bool ggc = false): m_ggc (ggc), +m_insertion_enabled (true), m_symtab (symtab) + { +cgraph_node *node; + +
Re: [PATCH 2/4] New data structure for cgraph_summary introduced.
On 11/13/2014 04:50 PM, Jan Hubicka wrote: gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * Makefile.in: New object file is added. * cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID is filled up. * cgraph_summary.c: New file. * cgraph_summary.h: New file. Since I am trying to get rid of the cgraph prefixes for symbols (keep it for the graph only) and the summaries can be annotated to variables too. Even if it not necessarily supported by your current implementation, lets keep API prepared for it. So I would call it symtab-summary.* for source files and symtab_summary for base type (probably function_summary for annotating functions/cgraph_edge_summary for annotating edges?) Hello. I followed your remarks, new class is called function_summary and is located in symbol-summary.h. diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e2becb9..588b6d5 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1225,6 +1225,8 @@ public: int count_materialization_scale; /* Unique id of the node. */ int uid; + /* Summary unique id of the node. */ + int summary_uid; What makes summary_uid better than uid? Because cgraph_node::uid is not a unique ID, it's recycled. As I can see, there are two remaining usages of the fact that cgraph::uid are quite consecutive: a) node_growth_cache vector is resized according to cgraph_max_uid b) lto-partition.c: lto_balanced_map If we change ipa-related stuff to annotations and lto_balanced_map with be rewritten, we can finally unify uid and summary_uid. As Martin correctly pointed out, we should unify cgraph_node dumps, we combine uid and order. diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c new file mode 100644 index 000..9af1d7e --- /dev/null +++ b/gcc/cgraph_summary.c And why do we need this file? It will need license header if really needed. Sure, the file can be removed. Martin The implementation seems sane - I will check the actual uses :) Please send the updated patch though. Honza From d7c149edea20850e95fde2e2e332895f5b5a8594 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Thu, 13 Nov 2014 15:11:05 +0100 Subject: [PATCH 1/3] New data structure for function_summary introduced. gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID is filled up. * symbol-summary.h: New file. * gengtype.c (open_base_files): Add symbol-summary.h. * toplev.c (general_init): Call constructor of symbol_table. --- gcc/cgraph.h | 8 ++ gcc/gengtype.c | 4 +- gcc/symbol-summary.h | 313 +++ gcc/toplev.c | 3 +- 4 files changed, 325 insertions(+), 3 deletions(-) create mode 100644 gcc/symbol-summary.h diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e2becb9..588b6d5 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1225,6 +1225,8 @@ public: int count_materialization_scale; /* Unique id of the node. */ int uid; + /* Summary unique id of the node. */ + int summary_uid; /* ID assigned by the profiling. */ unsigned int profile_id; /* Time profiler: first run of function. */ @@ -1786,6 +1788,10 @@ public: friend class cgraph_node; friend class cgraph_edge; + symbol_table (): cgraph_max_summary_uid (1) + { + } + /* Initialize callgraph dump file. */ void initialize (void); @@ -1982,6 +1988,7 @@ public: int cgraph_count; int cgraph_max_uid; + int cgraph_max_summary_uid; int edges_count; int edges_max_uid; @@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void) node-uid = cgraph_max_uid++; } + node-summary_uid = cgraph_max_summary_uid++; return node; } diff --git a/gcc/gengtype.c b/gcc/gengtype.c index fac83ee..1e2db27 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -1842,8 +1842,8 @@ open_base_files (void) tree-ssa-loop-niter.h, tree-into-ssa.h, tree-dfa.h, tree-ssa.h, reload.h, cpp-id-data.h, tree-chrec.h, except.h, output.h, cfgloop.h, target.h, lto-streamer.h, - target-globals.h, ipa-ref.h, cgraph.h, ipa-prop.h, - ipa-inline.h, dwarf2out.h, NULL + target-globals.h, ipa-ref.h, cgraph.h, function-summary.h, + ipa-prop.h, ipa-inline.h, dwarf2out.h, NULL }; const char *const *ifp; outf_p gtype_desc_c; diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h new file mode 100644 index 000..893f065 --- /dev/null +++ b/gcc/symbol-summary.h @@ -0,0 +1,313 @@ +/* Callgraph summary data structure. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Martin Liska + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but
Re: [PATCH 4/4] Data structure is used for inline_summary struct.
On 11/13/2014 05:04 PM, Jan Hubicka wrote: + if (!inline_summary_summary) +inline_summary_summary = (inline_summary_cgraph_summary *) inline_summary_cgraph_summary::create_ggc (symtab); Hehe, this is funny naming scheme. Peraps inline_summary_d and inline_summary_t for the data and type? Hello. I adopted suggested naming scheme. - -static void -inline_node_duplication_hook (struct cgraph_node *src, - struct cgraph_node *dst, - ATTRIBUTE_UNUSED void *data) +void +inline_summary_cgraph_summary::duplication_hook (cgraph_node *src, + cgraph_node *dst, + inline_summary *, + inline_summary *info) Becuase those are no longer hooks but virtual function, I guess we could call them simply duplicate/insert/remove. Agree with the change. In a way I would like to see these to be methods of the underlying type rather than virtual methods of the summary, becuase these are operations on the data themselves. I was thinking to model these by specual constructor and copy constructor (taking the extra node pointer parameters) and standard destructor. I am not sure this would be more understandable this way? Motivation for this implementation is: a) it's useful to have an access to cgraph_node that is associated with a sumary b) with GTY, we cannot call destructors -/* Need a typedef for inline_summary because of inline function - 'inline_summary' below. */ -typedef struct inline_summary inline_summary_t; -extern GTY(()) vecinline_summary_t, va_gc *inline_summary_vec; +class GTY((user)) inline_summary_cgraph_summary: public cgraph_summary inline_summary * +{ +public: + inline_summary_cgraph_summary (symbol_table *symtab, bool ggc): +cgraph_summary inline_summary * (symtab, ggc) {} + + static inline_summary_cgraph_summary *create_ggc (symbol_table *symtab) + { +inline_summary_cgraph_summary *summary = new (ggc_cleared_alloc inline_summary_cgraph_summary ()) inline_summary_cgraph_summary(symtab, true); +summary-disable_insertion_hook (); +return summary; + } + + + virtual void insertion_hook (cgraph_node *, inline_summary *); + virtual void removal_hook (cgraph_node *node, inline_summary *); + virtual void duplication_hook (cgraph_node *src, cgraph_node *dst, inline_summary *src_data, inline_summary *dst_data); +}; + +extern GTY(()) cgraph_summary inline_summary * *inline_summary_summary; All in all it looks better than original code. If we moved insert/ /* Information kept about parameter of call site. */ struct inline_param_summary @@ -249,10 +265,10 @@ void clone_inlined_nodes (struct cgraph_edge *e, bool, bool, int *, extern int ncalls_inlined; extern int nfunctions_inlined; -static inline struct inline_summary * -inline_summary (struct cgraph_node *node) +static inline inline_summary * +get_inline_summary (const struct cgraph_node *node) { - return (*inline_summary_vec)[node-uid]; + return (*inline_summary_summary)[node-summary_uid]; Hmm, i guess there is no way to avoid the (*...)? Otherwise it would be cleaner to use inline_summary[...] instead of get_inline_summary IMO. I added function_summary::get method, where the usage looks cleaner: inline_summary_d-get (node). Thanks, Martin Thanks for working on this! Honza From 6e8531d8d3659524e337c7c1d96596952c3ff0e8 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 14 Nov 2014 14:54:12 +0100 Subject: [PATCH 3/3] Data structure is used for inline_summary struct. gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * cgraphunit.c (symbol_table::process_new_functions): inline_summary_vec is replaced with inline_summary_t. * ipa-cp.c (ipcp_cloning_candidate_p): Usage of inline_summary_d::get. (devirtualization_time_bonus): Likewise. (estimate_local_effects): Likewise. (ipcp_propagate_stage): Likewise. * ipa-inline-analysis.c (evaluate_conditions_for_known_args): Likewise. (evaluate_properties_for_edge): Likewise. (inline_summary_alloc): Deletion of old hook holders. (reset_inline_summary): inline_summary is added as argument. (inline_summary_cgraph_summary::removal_hook): New function. (inline_summary_cgraph_summary::duplication_hook): Likewise. (dump_inline_edge_summary): Struct keyword removed. (dump_inline_summary): Likewise. (estimate_function_body_sizes): Usage of inline_summary_d::get. (compute_inline_parameters): Likewise. (estimate_edge_devirt_benefit): Struct keyword removed. (estimate_node_size_and_time): Likewise. (inline_update_callee_summaries): Likewise. (inline_merge_summary): Usage of inline_summary_d::get. (inline_update_overall_summary): Likewise. (simple_edge_hints): Likewise. (do_estimate_edge_time): Likewise. (estimate_time_after_inlining): Likewise. (estimate_size_after_inlining): Likewise. (do_estimate_growth): Likewise. (growth_likely_positive): Likewise. (inline_generate_summary):
Re: [PATCH 2/4] New data structure for cgraph_summary introduced.
On 11/14/2014 03:04 PM, Martin Liška wrote: On 11/13/2014 04:50 PM, Jan Hubicka wrote: gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * Makefile.in: New object file is added. * cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID is filled up. * cgraph_summary.c: New file. * cgraph_summary.h: New file. Since I am trying to get rid of the cgraph prefixes for symbols (keep it for the graph only) and the summaries can be annotated to variables too. Even if it not necessarily supported by your current implementation, lets keep API prepared for it. So I would call it symtab-summary.* for source files and symtab_summary for base type (probably function_summary for annotating functions/cgraph_edge_summary for annotating edges?) Hello. I followed your remarks, new class is called function_summary and is located in symbol-summary.h. diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e2becb9..588b6d5 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1225,6 +1225,8 @@ public: int count_materialization_scale; /* Unique id of the node. */ int uid; + /* Summary unique id of the node. */ + int summary_uid; What makes summary_uid better than uid? Because cgraph_node::uid is not a unique ID, it's recycled. As I can see, there are two remaining usages of the fact that cgraph::uid are quite consecutive: a) node_growth_cache vector is resized according to cgraph_max_uid b) lto-partition.c: lto_balanced_map If we change ipa-related stuff to annotations and lto_balanced_map with be rewritten, we can finally unify uid and summary_uid. As Martin correctly pointed out, we should unify cgraph_node dumps, we combine uid and order. diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c new file mode 100644 index 000..9af1d7e --- /dev/null +++ b/gcc/cgraph_summary.c And why do we need this file? It will need license header if really needed. Sure, the file can be removed. Martin The implementation seems sane - I will check the actual uses :) Please send the updated patch though. Honza Hello. There's v3 of the patch. Martin From 89b6fb0f599944b564726947b33a7be214dd0f74 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Thu, 13 Nov 2014 15:11:05 +0100 Subject: [PATCH 1/3] New data structure for function_summary introduced. gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID is filled up. * symbol-summary.h: New file. * gengtype.c (open_base_files): Add symbol-summary.h. * toplev.c (general_init): Call constructor of symbol_table. --- gcc/cgraph.h | 8 ++ gcc/gengtype.c | 4 +- gcc/symbol-summary.h | 317 +++ gcc/toplev.c | 3 +- 4 files changed, 329 insertions(+), 3 deletions(-) create mode 100644 gcc/symbol-summary.h diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e2becb9..588b6d5 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1225,6 +1225,8 @@ public: int count_materialization_scale; /* Unique id of the node. */ int uid; + /* Summary unique id of the node. */ + int summary_uid; /* ID assigned by the profiling. */ unsigned int profile_id; /* Time profiler: first run of function. */ @@ -1786,6 +1788,10 @@ public: friend class cgraph_node; friend class cgraph_edge; + symbol_table (): cgraph_max_summary_uid (1) + { + } + /* Initialize callgraph dump file. */ void initialize (void); @@ -1982,6 +1988,7 @@ public: int cgraph_count; int cgraph_max_uid; + int cgraph_max_summary_uid; int edges_count; int edges_max_uid; @@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void) node-uid = cgraph_max_uid++; } + node-summary_uid = cgraph_max_summary_uid++; return node; } diff --git a/gcc/gengtype.c b/gcc/gengtype.c index fac83ee..0161004 100644 --- a/gcc/gengtype.c +++ b/gcc/gengtype.c @@ -1842,8 +1842,8 @@ open_base_files (void) tree-ssa-loop-niter.h, tree-into-ssa.h, tree-dfa.h, tree-ssa.h, reload.h, cpp-id-data.h, tree-chrec.h, except.h, output.h, cfgloop.h, target.h, lto-streamer.h, - target-globals.h, ipa-ref.h, cgraph.h, ipa-prop.h, - ipa-inline.h, dwarf2out.h, NULL + target-globals.h, ipa-ref.h, cgraph.h, symbol-summary.h, + ipa-prop.h, ipa-inline.h, dwarf2out.h, NULL }; const char *const *ifp; outf_p gtype_desc_c; diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h new file mode 100644 index 000..039d052 --- /dev/null +++ b/gcc/symbol-summary.h @@ -0,0 +1,317 @@ +/* Callgraph summary data structure. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Martin Liska + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version
Re: [PATCH 4/4] Data structure is used for inline_summary struct.
On 11/14/2014 03:09 PM, Martin Liška wrote: On 11/13/2014 05:04 PM, Jan Hubicka wrote: + if (!inline_summary_summary) +inline_summary_summary = (inline_summary_cgraph_summary *) inline_summary_cgraph_summary::create_ggc (symtab); Hehe, this is funny naming scheme. Peraps inline_summary_d and inline_summary_t for the data and type? Hello. I adopted suggested naming scheme. - -static void -inline_node_duplication_hook (struct cgraph_node *src, - struct cgraph_node *dst, - ATTRIBUTE_UNUSED void *data) +void +inline_summary_cgraph_summary::duplication_hook (cgraph_node *src, + cgraph_node *dst, + inline_summary *, + inline_summary *info) Becuase those are no longer hooks but virtual function, I guess we could call them simply duplicate/insert/remove. Agree with the change. In a way I would like to see these to be methods of the underlying type rather than virtual methods of the summary, becuase these are operations on the data themselves. I was thinking to model these by specual constructor and copy constructor (taking the extra node pointer parameters) and standard destructor. I am not sure this would be more understandable this way? Motivation for this implementation is: a) it's useful to have an access to cgraph_node that is associated with a sumary b) with GTY, we cannot call destructors -/* Need a typedef for inline_summary because of inline function - 'inline_summary' below. */ -typedef struct inline_summary inline_summary_t; -extern GTY(()) vecinline_summary_t, va_gc *inline_summary_vec; +class GTY((user)) inline_summary_cgraph_summary: public cgraph_summary inline_summary * +{ +public: + inline_summary_cgraph_summary (symbol_table *symtab, bool ggc): +cgraph_summary inline_summary * (symtab, ggc) {} + + static inline_summary_cgraph_summary *create_ggc (symbol_table *symtab) + { +inline_summary_cgraph_summary *summary = new (ggc_cleared_alloc inline_summary_cgraph_summary ()) inline_summary_cgraph_summary(symtab, true); +summary-disable_insertion_hook (); +return summary; + } + + + virtual void insertion_hook (cgraph_node *, inline_summary *); + virtual void removal_hook (cgraph_node *node, inline_summary *); + virtual void duplication_hook (cgraph_node *src, cgraph_node *dst, inline_summary *src_data, inline_summary *dst_data); +}; + +extern GTY(()) cgraph_summary inline_summary * *inline_summary_summary; All in all it looks better than original code. If we moved insert/ /* Information kept about parameter of call site. */ struct inline_param_summary @@ -249,10 +265,10 @@ void clone_inlined_nodes (struct cgraph_edge *e, bool, bool, int *, extern int ncalls_inlined; extern int nfunctions_inlined; -static inline struct inline_summary * -inline_summary (struct cgraph_node *node) +static inline inline_summary * +get_inline_summary (const struct cgraph_node *node) { - return (*inline_summary_vec)[node-uid]; + return (*inline_summary_summary)[node-summary_uid]; Hmm, i guess there is no way to avoid the (*...)? Otherwise it would be cleaner to use inline_summary[...] instead of get_inline_summary IMO. I added function_summary::get method, where the usage looks cleaner: inline_summary_d-get (node). Thanks, Martin Thanks for working on this! Honza Patch v3. Martin From 7f57a3a762fecea9a20e307f06e868a73da98000 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 14 Nov 2014 14:54:12 +0100 Subject: [PATCH 3/3] Data structure is used for inline_summary struct. gcc/ChangeLog: 2014-11-12 Martin Liska mli...@suse.cz * cgraphunit.c (symbol_table::process_new_functions): inline_summary_vec is replaced with inline_summary_t. * ipa-cp.c (ipcp_cloning_candidate_p): Usage of inline_summary_t::get. (devirtualization_time_bonus): Likewise. (estimate_local_effects): Likewise. (ipcp_propagate_stage): Likewise. * ipa-inline-analysis.c (evaluate_conditions_for_known_args): Likewise. (evaluate_properties_for_edge): Likewise. (inline_summary_alloc): Deletion of old hook holders. (reset_inline_summary): inline_summary is added as argument. (inline_summary_cgraph_summary::removal_hook): New function. (inline_summary_cgraph_summary::duplication_hook): Likewise. (dump_inline_edge_summary): Struct keyword removed. (dump_inline_summary): Likewise. (estimate_function_body_sizes): Usage of inline_summary_t::get. (compute_inline_parameters): Likewise. (estimate_edge_devirt_benefit): Struct keyword removed. (estimate_node_size_and_time): Likewise. (inline_update_callee_summaries): Likewise. (inline_merge_summary): Usage of inline_summary_t::get. (inline_update_overall_summary): Likewise. (simple_edge_hints): Likewise. (do_estimate_edge_time): Likewise. (estimate_time_after_inlining): Likewise. (estimate_size_after_inlining): Likewise. (do_estimate_growth): Likewise. (growth_likely_positive): Likewise
Re: [PATCH 4/4] Data structure is used for inline_summary struct.
On 11/14/2014 05:06 PM, Jan Hubicka wrote: In a way I would like to see these to be methods of the underlying type rather than virtual methods of the summary, becuase these are operations on the data themselves. I was thinking to model these by specual constructor and copy constructor (taking the extra node pointer parameters) and standard destructor. I am not sure this would be more understandable this way? Motivation for this implementation is: a) it's useful to have an access to cgraph_node that is associated with a sumary Yep, one would have node addition ctor (symtab_node *); (or cgraph/varpool nodes for cgraph/varpool annotations) that would default to ctor for implementations that do not care about node. And node duplication ctor ctor (summary , symtab_node *, symtab_node *) that would default to copy constructor for data that do not need to be copied. Hello. I have no problem with such construction and destruction, we can also provide base implementation. I would say that main advantage (in addition to have a way to provide resonable defaults) is to make ctors/dtors of the embedded classes working well, so one can for example embedd pointer_map and not care about its construction/destruction. b) with GTY, we cannot call destructor Everything in symbol table is expecitely memory managed (i.e. enver left to be freed by garbage collector). It resists in GTY only to allow linking garbage collected object from them and to get PCH working. However GTY types need to be allocated by ggc_alloc and one can't call dtor. This was main motivation for providing hooks instead of ctor/dtor API. Maybe I miss something? Thanks, Martin This is however quite cosmetic issue I would preffer our C++ guys to comment on. We can tweak this incrementally. +void +inline_summary_t::duplicate (cgraph_node *src, +cgraph_node *dst, +inline_summary *, +inline_summary *info) Also we should have a way to say that the annotation do not need to be duplicated (for example when we do not want to annotate inline clones). Probably by adding duplicate_p predicate that is called before the actual duplication happens? The updated patch is OK, I will take a look on the main patch. Honza { - struct inline_summary *info; inline_summary_alloc (); - info = inline_summary (dst); - memcpy (info, inline_summary (src), sizeof (struct inline_summary)); + memcpy (info, inline_summary_d-get (src), sizeof (inline_summary)); /* TODO: as an optimization, we may avoid copying conditions that are known to be false or true. */ info-conds = vec_safe_copy (info-conds); @@ -1328,7 +1309,7 @@ free_growth_caches (void) static void dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node, - struct inline_summary *info) + inline_summary *info) { struct cgraph_edge *edge; for (edge = node-callees; edge; edge = edge-next_callee) @@ -1345,8 +1326,8 @@ dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node, ? inlined : cgraph_inline_failed_string (edge- inline_failed), indent, , es-loop_depth, edge-frequency, es-call_stmt_size, es-call_stmt_time, - (int) inline_summary (callee)-size / INLINE_SIZE_SCALE, - (int) inline_summary (callee)-estimated_stack_size); + (int) inline_summary_d-get (callee)-size / INLINE_SIZE_SCALE, + (int) inline_summary_d-get (callee)-estimated_stack_size); if (es-predicate) { @@ -1372,9 +1353,9 @@ dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node, fprintf (f, %*sStack frame offset %i, callee self size %i, callee size %i\n, indent + 2, , - (int) inline_summary (callee)-stack_frame_offset, - (int) inline_summary (callee)-estimated_self_stack_size, - (int) inline_summary (callee)-estimated_stack_size); + (int) inline_summary_d-get (callee)-stack_frame_offset, + (int) inline_summary_d-get (callee)-estimated_self_stack_size, + (int) inline_summary_d-get (callee)-estimated_stack_size); dump_inline_edge_summary (f, indent + 2, callee, info); } } @@ -1402,7 +1383,7 @@ dump_inline_summary (FILE *f, struct cgraph_node *node) { if (node-definition) { - struct inline_summary *s = inline_summary (node); + inline_summary *s = inline_summary_d-get (node); size_time_entry *e; int i; fprintf (f, Inline summary for %s/%i, node-name (), @@ -1725,7 +1706,7 @@ eliminated_by_inlining_prob (gimple stmt) static void set_cond_stmt_execution_predicate (struct ipa_node_params *info, - struct inline_summary *summary, +
Re: [PATCH 4/4] Data structure is used for inline_summary struct.
On 11/18/2014 11:25 PM, Martin Jambor wrote: On Tue, Nov 18, 2014 at 07:59:26PM +0100, Jan Hubicka wrote: Hi, On Tue, Nov 18, 2014 at 04:39:00PM +0100, Jan Hubicka wrote: On Fri, Nov 14, 2014 at 08:59:10PM +0100, Jan Hubicka wrote: b) with GTY, we cannot call destructor Everything in symbol table is expecitely memory managed (i.e. enver left to be freed by garbage collector). It resists in GTY only to allow linking garbage collected object from them and to get PCH working. Well, if I understand the intent correctly, summaries are for stuff that is not in the symbol table. For example jump functions are a Correct. vector of structures possibly containing trees, so everything has to be in garbage collected memory. When an edge is removed, it is necessary to be notified about it immediately, for example to decrement rdesc_refcount (you might argue that that should be done in a separate hook and not from within a summary class but then you start to rely on hook invocation ordering so I think it is better to eventually use the summaries for it too). I do not see why ctors/dtors can not do the reference counting. In fact this is how refcounting is done usually anyway? Well, when there is no garbage collection involved then yes, that is how you normally do it but in the GC case, there is the question of what is the appropriate time to call destructor on garbage collected data (like jump functions)? I still fail to see problem here. Summaries are explicitly managed- they are constructed at summary construction time or when new callgarph node is introduced/duplicated. They are destroyed when callgarph node is destroyed or whole summary is ddestroyed. It is job of the summary datastructure to call proper ctors/dtors, not job of garbage collector that provides the underlying memory management. I do not think that all summaries (in the meaning of a description of one particular symbol table node or call graph edge) are explicitely managed. For example ipa_edge_args or ipa_agg_replacement_value (which my alignment patch changes to ipcp_transformation_summary) are allocated in GC memory because they contain trees. If you have datastructure that points to something that is not explicitly managed (i.e. tree expression), you just can not have non-trivial constructor on that datastructure, because that is freed transparently by gty that don't do destruction... I admit to not being particularly bright today but that seems to be exactly my point. Well, in your case you have datastructure jump_function that contain a pointer to tree (EXPR). What I am trying to explain is that I see no reson why jump_function needs to be POD. I never said that the summary object needs to be a POD, I only said I liked the possibility of storing very simple objects (without wrapping them in classes with constructors and destructors). That is of course nothing more than my personal preference. The tree pointed to by EXPR pointer can not have a dtor by itself because GGC will not call it upon freeing. It is true that jump_function lives in GGC memory (to make pointer to expr work) but it never gets removed by ggc_collect because it is always pointed to by the summary datastructure. There are two ways to free the jump_function datastructure. 1) removing the symbol node it is attached to. Here the symtab code will call removal hook that was registered by container template. The container will call destructor of jump_function and the ggc_free its memory 2) removing the summary. In this case I would again expect the container template to walk all summaries and free them. So even if your structure lives in GGC memory it is not really garbage collected and thus the lack of machinery to call dtors at a time ggc decides to free something is not a problem? In fact looking at struct default_hashmap_traits, I see: /* Called to dispose of the key and value before marking the entry as deleted. */ templatetypename T static void remove (T v) { v.~T (); } Now I see, I should have read your previous email more carefully, by explicitely managed you mean that destructors will be called explicitely by the summary infrastructure. I was wondering how you wanted to rip the summaries out of GGC memory. Well, I suppose that would work, and since explicit calls to destructors are basically the counterpart of placement new that we already plan to use, it might be actually be the proper C++ thing to do. (I am not sure I like it though, for all other purposes the summary objects will look like managed by the garbage collector and only we who read this thread will know that the lifetime of the object would be decoupled from the allocation-span of its memory). Thanks for the clarification, Martin Hello. I tried to come up with ctor/dtor solution for types passes to symbol_summary template class. Example: struct inline_summary { inline_summary (cgraph_node *node);
[PATCH] PR lto/63968: 175.vpr from cpu2000 fails to build with LTO
Hello. As I reimplemented fibheap to C++ template, Honza told me that replace_key method actually supports just decrement operation. Old implementation suppress any feedback if we try to increase key: fibheap.c: ... /* If we wanted to, we could actually do a real increase by redeleting and inserting. However, this would require O (log n) time. So just bail out for now. */ if (fibheap_comp_data (heap, key, data, node) 0) return NULL; ... My reimplementation added assert for such kind operation, as this PR shows we try to do increment in reorder-bb. Thus, I added fibonacci_heap::replace_key method that can increment key (it deletes the node and new key is associated with the node). The patch can bootstrap on x86_64-linux-pc and no new regression was introduced. I would like to ask someone if the increase operation for bb-reorder is valid or not? Thanks, Martin gcc/ChangeLog: 2014-11-20 Martin Liska mli...@suse.cz * bb-reorder.c (find_traces_1_round): decreate_key is replaced with replace_key method. * fibonacci_heap.h (fibonacci_heap::insert): New argument. (fibonacci_heap::replace_key_data): Likewise. (fibonacci_heap::replace_key): New method that can even increment key, this operation costs O(log N). (fibonacci_heap::extract_min): New argument. (fibonacci_heap::delete_node): Likewise. diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 689d7b6..b568114 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -644,7 +644,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, (long) bbd[e-dest-index].node-get_key (), key); } - bbd[e-dest-index].heap-decrease_key + bbd[e-dest-index].heap-replace_key (bbd[e-dest-index].node, key); } } @@ -812,7 +812,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, e-dest-index, (long) bbd[e-dest-index].node-get_key (), key); } - bbd[e-dest-index].heap-decrease_key + bbd[e-dest-index].heap-replace_key (bbd[e-dest-index].node, key); } } diff --git a/gcc/fibonacci_heap.h b/gcc/fibonacci_heap.h index ecb92f8..3fce370 100644 --- a/gcc/fibonacci_heap.h +++ b/gcc/fibonacci_heap.h @@ -183,20 +183,27 @@ public: } /* For given NODE, set new KEY value. */ - K decrease_key (fibonacci_node_t *node, K key) + K replace_key (fibonacci_node_t *node, K key) { K okey = node-m_key; -gcc_assert (key = okey); replace_key_data (node, key, node-m_data); return okey; } + /* For given NODE, decrease value to new KEY. */ + K decrease_key (fibonacci_node_t *node, K key) + { +gcc_assert (key = node-m_key); +return replace_key (node, key); + } + /* For given NODE, set new KEY and DATA value. */ V *replace_key_data (fibonacci_node_t *node, K key, V *data); - /* Extract minimum node in the heap. */ - V *extract_min (); + /* Extract minimum node in the heap. If RELEASE is specified, + memory is released. */ + V *extract_min (bool release = true); /* Return value associated with minimum node in the heap. */ V *min () @@ -214,12 +221,15 @@ public: } /* Delete NODE in the heap. */ - V *delete_node (fibonacci_node_t *node); + V *delete_node (fibonacci_node_t *node, bool release = true); /* Union the heap with HEAPB. */ fibonacci_heap *union_with (fibonacci_heap *heapb); private: + /* Insert new NODE given by KEY and DATA associated with the key. */ + fibonacci_node_t *insert (fibonacci_node_t *node, K key, V *data); + /* Insert it into the root list. */ void insert_root (fibonacci_node_t *node); @@ -322,6 +332,15 @@ fibonacci_heapK,V::insert (K key, V *data) /* Create the new node. */ fibonacci_nodeK,V *node = new fibonacci_node_t (); + return insert (node, key, data); +} + +/* Insert new NODE given by KEY and DATA associated with the key. */ + +templateclass K, class V +fibonacci_nodeK,V* +fibonacci_heapK,V::insert (fibonacci_node_t *node, K key, V *data) +{ /* Set the node's data. */ node-m_data = data; node-m_key = key; @@ -345,17 +364,22 @@ V* fibonacci_heapK,V::replace_key_data (fibonacci_nodeK,V *node, K key, V *data) { - V *odata; K okey; fibonacci_nodeK,V *y; + V *odata = node-m_data; - /* If we wanted to, we could actually do a real increase by redeleting and - inserting. However, this would require O (log n) time. So just bail out - for now. */ + /* If we wanted to, we do a real increase by redeleting and + inserting. */ if (node-compare_data (key) 0) -return NULL; +{ + delete_node (node, false); + + node = new (node) fibonacci_node_t (); + insert (node, key, data); + + return odata; +} - odata = node-m_data; okey = node-m_key; node-m_data = data; node-m_key = key; @@ -385,7 +409,7 @@ fibonacci_heapK,V::replace_key_data
[PATCH] PR ipa/63909 ICE: SIGSEGV in ipa_icf_gimple::func_checker::compare_bb()
Hello. Following patch fixes ICE in IPA ICF. Problem was that number of non-debug statements in a BB can change (for instance by IPA split), so that the number is recomputed. Patch can bootstrap on x86_64-linux-pc and no regression has been seen. Ready for trunk? Thanks, Martin gcc/ChangeLog: 2014-11-20 Martin Liska mli...@suse.cz * gimple-iterator.h (gsi_nondebug_stmt_count): New function. * ipa-icf-gimple.c (func_checker::compare_bb): Number of BB is recomputed because it can be split. gcc/testsuite/ChangeLog: 2014-11-20 Martin Liska mli...@suse.cz * gcc.dg/ipa/pr63909.c: New test. diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h index fb6cc07..f73b1f6 100644 --- a/gcc/gimple-iterator.h +++ b/gcc/gimple-iterator.h @@ -331,4 +331,18 @@ gsi_seq (gimple_stmt_iterator i) return *i.seq; } +/* Return number of nondebug statements in basic block BB. */ + +static inline unsigned +gsi_nondebug_stmt_count (basic_block bb) +{ + unsigned c = 0; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (gsi)) +if (!is_gimple_debug (gsi_stmt (gsi))) + c++; + + return c; +} + #endif /* GCC_GIMPLE_ITERATOR_H */ diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 8f2a438..83661ac 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -563,6 +563,9 @@ func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) gimple_stmt_iterator gsi1, gsi2; gimple s1, s2; + bb1-nondbg_stmt_count = gsi_nondebug_stmt_count (bb1-bb); + bb2-nondbg_stmt_count = gsi_nondebug_stmt_count (bb2-bb); + if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count || bb1-edge_count != bb2-edge_count) return return_false (); diff --git a/gcc/testsuite/gcc.dg/ipa/pr63909.c b/gcc/testsuite/gcc.dg/ipa/pr63909.c new file mode 100644 index 000..8538e21 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr63909.c @@ -0,0 +1,27 @@ +/* { dg-options -O2 -fno-guess-branch-probability } */ + +int z; + +__attribute__((noinline)) +void g () +{ + if (++z) +__builtin_exit (0); + g (); +} + +__attribute__((noinline)) +void f () +{ + if (++z) +__builtin_exit (0); + f (); +} + +int main() +{ + f (); + g (); + + return 0; +}
Re: [PATCH] PR lto/63968: 175.vpr from cpu2000 fails to build with LTO
On 11/20/2014 10:13 PM, Jan Hubicka wrote: Hello. As I reimplemented fibheap to C++ template, Honza told me that replace_key method actually supports just decrement operation. Old implementation suppress any feedback if we try to increase key: fibheap.c: ... /* If we wanted to, we could actually do a real increase by redeleting and inserting. However, this would require O (log n) time. So just bail out for now. */ if (fibheap_comp_data (heap, key, data, node) 0) return NULL; ... My reimplementation added assert for such kind operation, as this PR shows we try to do increment in reorder-bb. Thus, I added fibonacci_heap::replace_key method that can increment key (it deletes the node and new key is associated with the node). The patch can bootstrap on x86_64-linux-pc and no new regression was introduced. I would like to ask someone if the increase operation for bb-reorder is valid or not? Can you verify that the implementation is correct? I tend to remember that I introduced the lazy incerementation to inliner both for perofrmance and correctness reasons. I used to get odd orders when keys was increased. Honza Hello. What kind of correctness do you mean? Old implementation didn't support increment operation and the fact was hushed up. Martin Thanks, Martin gcc/ChangeLog: 2014-11-20 Martin Liska mli...@suse.cz * bb-reorder.c (find_traces_1_round): decreate_key is replaced with replace_key method. * fibonacci_heap.h (fibonacci_heap::insert): New argument. (fibonacci_heap::replace_key_data): Likewise. (fibonacci_heap::replace_key): New method that can even increment key, this operation costs O(log N). (fibonacci_heap::extract_min): New argument. (fibonacci_heap::delete_node): Likewise. diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c index 689d7b6..b568114 100644 --- a/gcc/bb-reorder.c +++ b/gcc/bb-reorder.c @@ -644,7 +644,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, (long) bbd[e-dest-index].node-get_key (), key); } - bbd[e-dest-index].heap-decrease_key + bbd[e-dest-index].heap-replace_key (bbd[e-dest-index].node, key); } } @@ -812,7 +812,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, e-dest-index, (long) bbd[e-dest-index].node-get_key (), key); } - bbd[e-dest-index].heap-decrease_key + bbd[e-dest-index].heap-replace_key (bbd[e-dest-index].node, key); } } diff --git a/gcc/fibonacci_heap.h b/gcc/fibonacci_heap.h index ecb92f8..3fce370 100644 --- a/gcc/fibonacci_heap.h +++ b/gcc/fibonacci_heap.h @@ -183,20 +183,27 @@ public: } /* For given NODE, set new KEY value. */ - K decrease_key (fibonacci_node_t *node, K key) + K replace_key (fibonacci_node_t *node, K key) { K okey = node-m_key; -gcc_assert (key = okey); replace_key_data (node, key, node-m_data); return okey; } + /* For given NODE, decrease value to new KEY. */ + K decrease_key (fibonacci_node_t *node, K key) + { +gcc_assert (key = node-m_key); +return replace_key (node, key); + } + /* For given NODE, set new KEY and DATA value. */ V *replace_key_data (fibonacci_node_t *node, K key, V *data); - /* Extract minimum node in the heap. */ - V *extract_min (); + /* Extract minimum node in the heap. If RELEASE is specified, + memory is released. */ + V *extract_min (bool release = true); /* Return value associated with minimum node in the heap. */ V *min () @@ -214,12 +221,15 @@ public: } /* Delete NODE in the heap. */ - V *delete_node (fibonacci_node_t *node); + V *delete_node (fibonacci_node_t *node, bool release = true); /* Union the heap with HEAPB. */ fibonacci_heap *union_with (fibonacci_heap *heapb); private: + /* Insert new NODE given by KEY and DATA associated with the key. */ + fibonacci_node_t *insert (fibonacci_node_t *node, K key, V *data); + /* Insert it into the root list. */ void insert_root (fibonacci_node_t *node); @@ -322,6 +332,15 @@ fibonacci_heapK,V::insert (K key, V *data) /* Create the new node. */ fibonacci_nodeK,V *node = new fibonacci_node_t (); + return insert (node, key, data); +} + +/* Insert new NODE given by KEY and DATA associated with the key. */ + +templateclass K, class V +fibonacci_nodeK,V* +fibonacci_heapK,V::insert (fibonacci_node_t *node, K key, V *data) +{ /* Set the node's data. */ node-m_data = data; node-m_key = key; @@ -345,17 +364,22 @@ V* fibonacci_heapK,V::replace_key_data (fibonacci_nodeK,V *node, K key,
Re: [PATCH 8/9] Negative numbers added for sreal class.
On 11/14/2014 11:48 AM, Richard Biener wrote: On Thu, Nov 13, 2014 at 1:35 PM, mliska mli...@suse.cz wrote: gcc/ChangeLog: 2014-11-13 Martin Liska mli...@suse.cz * predict.c (propagate_freq): More elegant sreal API is used. (estimate_bb_frequencies): New static constants defined by sreal replace precomputed ones. * sreal.c (sreal::normalize): New function. (sreal::to_int): Likewise. (sreal::operator+): Likewise. (sreal::operator-): Likewise. * sreal.h: Definition of new functions added. Please use gcc_checking_assert()s everywhere. sreal is supposed to be fast... (I see it has current uses of gcc_assert - you may want to mass-convert them as a followup). --- gcc/predict.c | 30 +++- gcc/sreal.c | 56 gcc/sreal.h | 75 --- 3 files changed, 126 insertions(+), 35 deletions(-) diff --git a/gcc/predict.c b/gcc/predict.c index 0215e91..0f640f5 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ -static sreal real_zero, real_one, real_almost_one, real_br_prob_base, +static sreal real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max; static void combine_predictions_for_insn (rtx_insn *, basic_block); @@ -2528,13 +2528,13 @@ propagate_freq (basic_block head, bitmap tovisit) bb-count = bb-frequency = 0; } - BLOCK_INFO (head)-frequency = real_one; + BLOCK_INFO (head)-frequency = sreal::one (); last = head; for (bb = head; bb; bb = nextbb) { edge_iterator ei; - sreal cyclic_probability = real_zero; - sreal frequency = real_zero; + sreal cyclic_probability = sreal::zero (); + sreal frequency = sreal::zero (); nextbb = BLOCK_INFO (bb)-next; BLOCK_INFO (bb)-next = NULL; @@ -2559,13 +2559,13 @@ propagate_freq (basic_block head, bitmap tovisit) * BLOCK_INFO (e-src)-frequency / REG_BR_PROB_BASE); */ - sreal tmp (e-probability, 0); + sreal tmp = e-probability; tmp *= BLOCK_INFO (e-src)-frequency; tmp *= real_inv_br_prob_base; frequency += tmp; } - if (cyclic_probability == real_zero) + if (cyclic_probability == sreal::zero ()) { BLOCK_INFO (bb)-frequency = frequency; } @@ -2577,7 +2577,7 @@ propagate_freq (basic_block head, bitmap tovisit) /* BLOCK_INFO (bb)-frequency = frequency / (1 - cyclic_probability) */ - cyclic_probability = real_one - cyclic_probability; + cyclic_probability = sreal::one () - cyclic_probability; BLOCK_INFO (bb)-frequency = frequency / cyclic_probability; } } @@ -2591,7 +2591,7 @@ propagate_freq (basic_block head, bitmap tovisit) = ((e-probability * BLOCK_INFO (bb)-frequency) / REG_BR_PROB_BASE); */ - sreal tmp (e-probability, 0); + sreal tmp = e-probability; tmp *= BLOCK_INFO (bb)-frequency; EDGE_INFO (e)-back_edge_prob = tmp * real_inv_br_prob_base; } @@ -2873,13 +2873,11 @@ estimate_bb_frequencies (bool force) if (!real_values_initialized) { real_values_initialized = 1; - real_zero = sreal (0, 0); - real_one = sreal (1, 0); - real_br_prob_base = sreal (REG_BR_PROB_BASE, 0); - real_bb_freq_max = sreal (BB_FREQ_MAX, 0); + real_br_prob_base = REG_BR_PROB_BASE; + real_bb_freq_max = BB_FREQ_MAX; real_one_half = sreal (1, -1); - real_inv_br_prob_base = real_one / real_br_prob_base; - real_almost_one = real_one - real_inv_br_prob_base; + real_inv_br_prob_base = sreal::one () / real_br_prob_base; + real_almost_one = sreal::one () - real_inv_br_prob_base; } mark_dfs_back_edges (); @@ -2897,7 +2895,7 @@ estimate_bb_frequencies (bool force) FOR_EACH_EDGE (e, ei, bb-succs) { - EDGE_INFO (e)-back_edge_prob = sreal (e-probability, 0); + EDGE_INFO (e)-back_edge_prob = e-probability; EDGE_INFO (e)-back_edge_prob *= real_inv_br_prob_base; } } @@ -2906,7 +2904,7 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges. */ estimate_loops (); - freq_max = real_zero; + freq_max = sreal::zero (); FOR_EACH_BB_FN (bb, cfun) if (freq_max BLOCK_INFO (bb)-frequency)
Re: [PATCH] PR ipa/63909 ICE: SIGSEGV in ipa_icf_gimple::func_checker::compare_bb()
On 11/20/2014 05:41 PM, Richard Biener wrote: On Thu, Nov 20, 2014 at 5:30 PM, Martin Liška mli...@suse.cz wrote: Hello. Following patch fixes ICE in IPA ICF. Problem was that number of non-debug statements in a BB can change (for instance by IPA split), so that the number is recomputed. Huh, so can it get different for both candidates? I think the stmt compare loop should be terminated on gsi_end_p of either iterator and return false for any remaining non-debug-stmts on the other. Thus, not walk all stmts twice here. Hello. Sorry for the previous patch, you are right it can be fixed in purer way. Please take a look at attached patch. As IPA split is run early I don't see how it should affect a real IPA pass though? Sorry for non precise information, the problematic BB is changed here: #0 gsi_split_seq_before (i=0x7fffd550, pnew_seq=0x7fffd528) at ../../gcc/gimple-iterator.c:429 #1 0x00b95a2a in gimple_split_block (bb=0x76c41548, stmt=0x0) at ../../gcc/tree-cfg.c:5707 #2 0x007563cf in split_block (bb=0x76c41548, i=i@entry=0x0) at ../../gcc/cfghooks.c:508 #3 0x00756b44 in split_block_after_labels (bb=optimized out) at ../../gcc/cfghooks.c:549 #4 make_forwarder_block (bb=optimized out, redirect_edge_p=redirect_edge_p@entry=0x75d4e0 mfb_keep_just(edge_def*), new_bb_cbk=new_bb_cbk@entry=0x0) at ../../gcc/cfghooks.c:842 #5 0x0076085a in create_preheader (loop=0x76d56948, flags=optimized out) at ../../gcc/cfgloopmanip.c:1563 #6 0x00760aea in create_preheaders (flags=1) at ../../gcc/cfgloopmanip.c:1613 #7 0x009bc6b0 in apply_loop_flags (flags=15) at ../../gcc/loop-init.c:75 #8 0x009bc7d3 in loop_optimizer_init (flags=15) at ../../gcc/loop-init.c:136 #9 0x00957914 in estimate_function_body_sizes (node=0x76c47620, early=false) at ../../gcc/ipa-inline-analysis.c:2480 #10 0x0095948b in compute_inline_parameters (node=0x76c47620, early=false) at ../../gcc/ipa-inline-analysis.c:2907 #11 0x0095bd88 in inline_analyze_function (node=0x76c47620) at ../../gcc/ipa-inline-analysis.c:3994 #12 0x0095bed3 in inline_generate_summary () at ../../gcc/ipa-inline-analysis.c:4045 #13 0x00a70b71 in execute_ipa_summary_passes (ipa_pass=0x1dcb9e0) at ../../gcc/passes.c:2137 #14 0x00777a15 in ipa_passes () at ../../gcc/cgraphunit.c:2074 #15 symbol_table::compile (this=this@entry=0x76c3a000) at ../../gcc/cgraphunit.c:2187 #16 0x00778bcd in symbol_table::finalize_compilation_unit (this=0x76c3a000) at ../../gcc/cgraphunit.c:2340 #17 0x006580ee in c_write_global_declarations () at ../../gcc/c/c-decl.c:10777 #18 0x00b5bb8b in compile_file () at ../../gcc/toplev.c:584 #19 0x00b5def1 in do_compile () at ../../gcc/toplev.c:2041 #20 0x00b5e0fa in toplev::main (this=0x7fffdc9f, argc=20, argv=0x7fffdd98) at ../../gcc/toplev.c:2138 #21 0x0063f1d9 in main (argc=20, argv=0x7fffdd98) at ../../gcc/main.c:38 Patch can bootstrap on x86_64-linux-pc and no regression has been seen. Ready for trunk? Thanks, Martin Thanks, Richard. Patch can bootstrap on x86_64-linux-pc and no regression has been seen. Ready for trunk? Thanks, Martin From 09b90f6a5ec1e49464f57c333af43574ad8c1375 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Thu, 20 Nov 2014 16:28:54 +0100 Subject: [PATCH] Fix and new test. gcc/ChangeLog: 2014-11-21 Martin Liska mli...@suse.cz * gimple-iterator.h (gsi_start_bb_nondebug): New function. * ipa-icf-gimple.c (func_checker::compare_bb): Correct iteration replaces loop based on precomputed number of non-debug statements. gcc/testsuite/ChangeLog: 2014-11-21 Martin Liska mli...@suse.cz * gcc.dg/ipa/pr63909.c: New test. --- gcc/gimple-iterator.h | 13 + gcc/ipa-icf-gimple.c | 25 ++--- gcc/testsuite/gcc.dg/ipa/pr63909.c | 27 +++ 3 files changed, 50 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/pr63909.c diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h index fb6cc07..e9602b3 100644 --- a/gcc/gimple-iterator.h +++ b/gcc/gimple-iterator.h @@ -211,6 +211,19 @@ gsi_stmt (gimple_stmt_iterator i) return i.ptr; } +/* Return a new iterator pointing to the first non-debug statement + in basic block BB. */ + +static inline gimple_stmt_iterator +gsi_start_bb_nondebug (basic_block bb) +{ + gimple_stmt_iterator gsi = gsi_start_bb (bb); + while (!gsi_end_p (gsi) is_gimple_debug (gsi_stmt (gsi))) +gsi_next (gsi); + + return gsi; +} + /* Return a block statement iterator that points to the first non-label statement in block BB. */ diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 8f2a438..ec0290a 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -559,24 +559,16 @@ func_checker::parse_labels (sem_bb *bb
Re: [PATCH 8/9] Negative numbers added for sreal class.
On 11/21/2014 01:03 PM, Richard Biener wrote: On Fri, Nov 21, 2014 at 12:21 PM, Martin Liška mli...@suse.cz wrote: On 11/14/2014 11:48 AM, Richard Biener wrote: On Thu, Nov 13, 2014 at 1:35 PM, mliska mli...@suse.cz wrote: gcc/ChangeLog: 2014-11-13 Martin Liska mli...@suse.cz * predict.c (propagate_freq): More elegant sreal API is used. (estimate_bb_frequencies): New static constants defined by sreal replace precomputed ones. * sreal.c (sreal::normalize): New function. (sreal::to_int): Likewise. (sreal::operator+): Likewise. (sreal::operator-): Likewise. * sreal.h: Definition of new functions added. Please use gcc_checking_assert()s everywhere. sreal is supposed to be fast... (I see it has current uses of gcc_assert - you may want to mass-convert them as a followup). --- gcc/predict.c | 30 +++- gcc/sreal.c | 56 gcc/sreal.h | 75 --- 3 files changed, 126 insertions(+), 35 deletions(-) diff --git a/gcc/predict.c b/gcc/predict.c index 0215e91..0f640f5 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ -static sreal real_zero, real_one, real_almost_one, real_br_prob_base, +static sreal real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max; static void combine_predictions_for_insn (rtx_insn *, basic_block); @@ -2528,13 +2528,13 @@ propagate_freq (basic_block head, bitmap tovisit) bb-count = bb-frequency = 0; } - BLOCK_INFO (head)-frequency = real_one; + BLOCK_INFO (head)-frequency = sreal::one (); last = head; for (bb = head; bb; bb = nextbb) { edge_iterator ei; - sreal cyclic_probability = real_zero; - sreal frequency = real_zero; + sreal cyclic_probability = sreal::zero (); + sreal frequency = sreal::zero (); nextbb = BLOCK_INFO (bb)-next; BLOCK_INFO (bb)-next = NULL; @@ -2559,13 +2559,13 @@ propagate_freq (basic_block head, bitmap tovisit) * BLOCK_INFO (e-src)-frequency / REG_BR_PROB_BASE); */ - sreal tmp (e-probability, 0); + sreal tmp = e-probability; tmp *= BLOCK_INFO (e-src)-frequency; tmp *= real_inv_br_prob_base; frequency += tmp; } - if (cyclic_probability == real_zero) + if (cyclic_probability == sreal::zero ()) { BLOCK_INFO (bb)-frequency = frequency; } @@ -2577,7 +2577,7 @@ propagate_freq (basic_block head, bitmap tovisit) /* BLOCK_INFO (bb)-frequency = frequency / (1 - cyclic_probability) */ - cyclic_probability = real_one - cyclic_probability; + cyclic_probability = sreal::one () - cyclic_probability; BLOCK_INFO (bb)-frequency = frequency / cyclic_probability; } } @@ -2591,7 +2591,7 @@ propagate_freq (basic_block head, bitmap tovisit) = ((e-probability * BLOCK_INFO (bb)-frequency) / REG_BR_PROB_BASE); */ - sreal tmp (e-probability, 0); + sreal tmp = e-probability; tmp *= BLOCK_INFO (bb)-frequency; EDGE_INFO (e)-back_edge_prob = tmp * real_inv_br_prob_base; } @@ -2873,13 +2873,11 @@ estimate_bb_frequencies (bool force) if (!real_values_initialized) { real_values_initialized = 1; - real_zero = sreal (0, 0); - real_one = sreal (1, 0); - real_br_prob_base = sreal (REG_BR_PROB_BASE, 0); - real_bb_freq_max = sreal (BB_FREQ_MAX, 0); + real_br_prob_base = REG_BR_PROB_BASE; + real_bb_freq_max = BB_FREQ_MAX; real_one_half = sreal (1, -1); - real_inv_br_prob_base = real_one / real_br_prob_base; - real_almost_one = real_one - real_inv_br_prob_base; + real_inv_br_prob_base = sreal::one () / real_br_prob_base; + real_almost_one = sreal::one () - real_inv_br_prob_base; } mark_dfs_back_edges (); @@ -2897,7 +2895,7 @@ estimate_bb_frequencies (bool force) FOR_EACH_EDGE (e, ei, bb-succs) { - EDGE_INFO (e)-back_edge_prob = sreal (e-probability, 0); + EDGE_INFO (e)-back_edge_prob = e-probability; EDGE_INFO (e)-back_edge_prob *= real_inv_br_prob_base; } } @@ -2906,7 +2904,7 @@ estimate_bb_frequencies (bool force) to outermost to examine frequencies for back edges
Re: [PATCH 8/9] Negative numbers added for sreal class.
On 11/21/2014 04:02 PM, Richard Biener wrote: On Fri, Nov 21, 2014 at 3:39 PM, Martin Liška mli...@suse.cz wrote: Hello. Ok, this is simplified, one can use sreal a = 12345 and it works ;) that's a new API, right? There is no max () and I think that using LONG_MIN here is asking for trouble (host dependence). The comment in the file says the max should be sreal (SREAL_MAX_SIG, SREAL_MAX_EXP) and the min sreal (-SREAL_MAX_SIG, SREAL_MAX_EXP)? Sure, sreal can store much bigger(smaller) numbers :) Where do you need sreal::to_double? The host shouldn't perform double calculations so it can be only for dumping? In which case the user should have used sreal::dump (), maybe with extra arguments. That new function was request from Honza, only for debugging purpose. I agree that dump should this kind of job. If no other problem, I will run tests once more and commit it. Thanks, Martin -#define SREAL_MAX_EXP (INT_MAX / 4) +#define SREAL_MAX_EXP (INT_MAX / 8) this change doesn't look necessary anymore? Btw, it's also odd that... #define SREAL_PART_BITS 32 ... #define SREAL_MIN_SIG ((uint64_t) 1 (SREAL_PART_BITS - 1)) #define SREAL_MAX_SIG (((uint64_t) 1 SREAL_PART_BITS) - 1) thus all m_sig values fit in 32bits but we still use a uint64_t m_sig ... (the implementation uses 64bit for internal computations, but still the storage is wasteful?) Of course the way normalize() works requires that storage to be 64bits to store unnormalized values. I'd say ok with the SREAL_MAX_EXP change reverted. Hi. You are right, this change was done because I used one bit for m_negative (bitfield), not needed any more. Final version attached. Thank you, Martin Thanks, Richard. Otherwise looks good to me and sorry for not noticing the above earlier. Thanks, Richard. Thanks, Martin }; extern void debug (sreal ref); @@ -76,12 +133,12 @@ inline sreal operator+= (sreal a, const sreal b) inline sreal operator-= (sreal a, const sreal b) { -return a = a - b; + return a = a - b; } inline sreal operator/= (sreal a, const sreal b) { -return a = a / b; + return a = a / b; } inline sreal operator*= (sreal a, const sreal b) -- 2.1.2 From b28e4264b5f9965ca5ab4f52ce6f4c9df00d4800 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 21 Nov 2014 12:07:40 +0100 Subject: [PATCH 1/2] Negative numbers added for sreal class. gcc/ChangeLog: 2014-11-13 Martin Liska mli...@suse.cz * predict.c (propagate_freq): More elegant sreal API is used. (estimate_bb_frequencies): Precomputed constants replaced by integer constants. * sreal.c (sreal::normalize): New function. (sreal::to_int): Likewise. (sreal::operator+): Likewise. (sreal::operator-): Likewise. * sreal.h: Definition of new functions added. --- gcc/predict.c | 30 gcc/sreal.c | 114 -- gcc/sreal.h | 82 +- 3 files changed, 174 insertions(+), 52 deletions(-) diff --git a/gcc/predict.c b/gcc/predict.c index 779af11..0cfe4a9 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ -static sreal real_zero, real_one, real_almost_one, real_br_prob_base, +static sreal real_almost_one, real_br_prob_base, real_inv_br_prob_base, real_one_half, real_bb_freq_max; static void combine_predictions_for_insn (rtx_insn *, basic_block); @@ -2541,13 +2541,13 @@ propagate_freq (basic_block head, bitmap tovisit) bb-count = bb-frequency = 0; } - BLOCK_INFO (head)-frequency = real_one; + BLOCK_INFO (head)-frequency = 1; last = head; for (bb = head; bb; bb = nextbb) { edge_iterator ei; - sreal cyclic_probability = real_zero; - sreal frequency = real_zero; + sreal cyclic_probability = 0; + sreal frequency = 0; nextbb = BLOCK_INFO (bb)-next; BLOCK_INFO (bb)-next = NULL; @@ -2572,13 +2572,13 @@ propagate_freq (basic_block head, bitmap tovisit) * BLOCK_INFO (e-src)-frequency / REG_BR_PROB_BASE); */ - sreal tmp (e-probability, 0); + sreal tmp = e-probability; tmp *= BLOCK_INFO (e-src)-frequency; tmp *= real_inv_br_prob_base; frequency += tmp; } - if (cyclic_probability == real_zero) + if (cyclic_probability == 0) { BLOCK_INFO (bb)-frequency = frequency; } @@ -2590,7 +2590,7 @@ propagate_freq (basic_block head, bitmap tovisit) /* BLOCK_INFO (bb)-frequency = frequency / (1 - cyclic_probability) */ - cyclic_probability = real_one - cyclic_probability; + cyclic_probability = sreal (1) - cyclic_probability; BLOCK_INFO (bb)-frequency = frequency / cyclic_probability; } } @@ -2604,7 +2604,7 @@ propagate_freq (basic_block head, bitmap tovisit
[PATCH] IPA ICF: memory leak fix
Hello. Following patch removes memory leak that was introduced by very first IPA ICF patch. I would like to thank David for hunting the leak. Patch an bootstrap on x86_86-linux-pc and no regression is introduced. Thanks, Martin From f959905e984a84d0353fb1e32ba83db2b6dfe4d2 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 21 Nov 2014 16:04:06 +0100 Subject: [PATCH] IPA ICF: memory leak fix gcc/ChangeLog: 2014-11-21 David Malcolm dmalc...@redhat.com Martin Liska mli...@suse.cz * ipa-icf.c (sem_function::equals_private): auto_vecint replaces int* allocated with XNEWVEC. (sem_function::bb_dict_test): Likewise. * ipa-icf.h: Likewise. --- gcc/ipa-icf.c | 15 ++- gcc/ipa-icf.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index e0633e7..4a0fcfb 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -410,7 +410,6 @@ sem_function::equals_private (sem_item *item, basic_block bb1, bb2; edge e1, e2; edge_iterator ei1, ei2; - int *bb_dict = NULL; bool result = true; tree arg1, arg2; @@ -489,8 +488,8 @@ sem_function::equals_private (sem_item *item, /* Basic block edges check. */ for (unsigned i = 0; i bb_sorted.length (); ++i) { - bb_dict = XNEWVEC (int, bb_sorted.length () + 2); - memset (bb_dict, -1, (bb_sorted.length () + 2) * sizeof (int)); + auto_vecint bb_dict; + bb_dict.safe_grow_cleared (bb_sorted.length () + 2); bb1 = bb_sorted[i]-bb; bb2 = m_compared_func-bb_sorted[i]-bb; @@ -957,9 +956,15 @@ sem_function::icf_handled_component_p (tree t) corresponds to TARGET. */ bool -sem_function::bb_dict_test (int* bb_dict, int source, int target) +sem_function::bb_dict_test (auto_vecint bb_dict, int source, int target) { - if (bb_dict[source] == -1) + /* bb_dict is cleared with zeros, so that source and target are + incremented. bb_dist is used to verify that edges in source and + target function correspond. */ + + source++; + target++; + if (bb_dict[source] == 0) { bb_dict[source] = target; return true; diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h index 046e858..75db93a 100644 --- a/gcc/ipa-icf.h +++ b/gcc/ipa-icf.h @@ -275,7 +275,7 @@ private: /* Basic blocks dictionary BB_DICT returns true if SOURCE index BB corresponds to TARGET. */ - bool bb_dict_test (int* bb_dict, int source, int target); + bool bb_dict_test (auto_vecint bb_dict, int source, int target); /* Iterates all tree types in T1 and T2 and returns true if all types are compatible. If COMPARE_POLYMORPHIC is set to true, -- 2.1.2
Re: [PATCH] IPA ICF: memory leak fix
On 11/22/2014 10:09 AM, Markus Trippelsdorf wrote: On 2014.11.22 at 09:05 +0100, Martin Liška wrote: Hello. Following patch removes memory leak that was introduced by very first IPA ICF patch. I would like to thank David for hunting the leak. Patch an bootstrap on x86_86-linux-pc and no regression is introduced. I gave the patch a quick spin on gcc112: *** Error in `/home/trippels/gcc_build_dir/./prev-gcc/lto1': free(): invalid next size (fast): 0x01000a5fc160 *** === Backtrace: = /lib64/libc.so.6(+0xa3d9c)[0x3fff7b6b3d9c] /lib64/libc.so.6(+0xaf0b4)[0x3fff7b6bf0b4] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN3vecIi7va_heap6vl_ptrE7releaseEv-0x1d4bc00)[0x1025dd88] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function14equals_privateEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c083c)[0x116586bc] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function6equalsEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c0578)[0x11658998] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf18sem_item_optimizer7executeEv-0x9b8774)[0x11660a84] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12pass_ipa_icf7executeEP8function-0x9b0314)[0x11668efc] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z16execute_one_passP8opt_pass-0x1647588)[0x1098a0a8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z21execute_ipa_pass_listP8opt_pass-0x1644c2c)[0x1098ca7c] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z8lto_mainv-0x1df20e4)[0x101b494c] /home/trippels/gcc_build_dir/./prev-gcc/lto1[0x10b599b8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN6toplev4mainEiPPc-0x1e8be70)[0x101507b8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(main-0x1ec8d8c)[0x1015493c] /lib64/libc.so.6(+0x447ac)[0x3fff7b6547ac] /lib64/libc.so.6(__libc_start_main-0x19cbf4)[0x3fff7b6549d4] === Memory map: ... Hello. Thank you for testing, problem is that I should grow the vector by 1, because '0' is used as NULL value. Please try my fixed patch. Thanks, Martin From 7280e2c8de246c72d2608b5c58590f4fabaf6234 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 21 Nov 2014 16:04:06 +0100 Subject: [PATCH] IPA ICF: memory leak fix gcc/ChangeLog: 2014-11-21 David Malcolm dmalc...@redhat.com Martin Liska mli...@suse.cz * ipa-icf.c (sem_function::equals_private): auto_vecint replaces int* allocated with XNEWVEC. (sem_function::bb_dict_test): Likewise. * ipa-icf.h: Likewise. --- gcc/ipa-icf.c | 18 +- gcc/ipa-icf.h | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index e0633e7..c8060bf 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -410,7 +410,6 @@ sem_function::equals_private (sem_item *item, basic_block bb1, bb2; edge e1, e2; edge_iterator ei1, ei2; - int *bb_dict = NULL; bool result = true; tree arg1, arg2; @@ -489,8 +488,11 @@ sem_function::equals_private (sem_item *item, /* Basic block edges check. */ for (unsigned i = 0; i bb_sorted.length (); ++i) { - bb_dict = XNEWVEC (int, bb_sorted.length () + 2); - memset (bb_dict, -1, (bb_sorted.length () + 2) * sizeof (int)); + auto_vecint bb_dict; + /* Size of bb_dict is number of basic blocks plus + 2 for entry and exit block and plus one because + '0' is used as NULL value. */ + bb_dict.safe_grow_cleared (bb_sorted.length () + 3); bb1 = bb_sorted[i]-bb; bb2 = m_compared_func-bb_sorted[i]-bb; @@ -957,9 +959,15 @@ sem_function::icf_handled_component_p (tree t) corresponds to TARGET. */ bool -sem_function::bb_dict_test (int* bb_dict, int source, int target) +sem_function::bb_dict_test (auto_vecint bb_dict, int source, int target) { - if (bb_dict[source] == -1) + /* bb_dict is cleared with zeros, so that source and target are + incremented. bb_dist is used to verify that edges in source and + target function correspond. */ + + source++; + target++; + if (bb_dict[source] == 0) { bb_dict[source] = target; return true; diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h index 046e858..75db93a 100644 --- a/gcc/ipa-icf.h +++ b/gcc/ipa-icf.h @@ -275,7 +275,7 @@ private: /* Basic blocks dictionary BB_DICT returns true if SOURCE index BB corresponds to TARGET. */ - bool bb_dict_test (int* bb_dict, int source, int target); + bool bb_dict_test (auto_vecint bb_dict, int source, int target); /* Iterates all tree types in T1 and T2 and returns true if all types are compatible. If COMPARE_POLYMORPHIC is set to true, -- 2.1.2
[PATCH] sreal class fix for PR64050 and PR64060
Hello. Following patch is fix sreal problems that are mentioned in PR64050, PR64060. I added new GCC plugin test where I test sreal arithmetics and number comparison. Patch can bootstrap on ppc64-linux-pc and x86_64-linux-pc and can pass regression tests. Thanks, Martin gcc/ChangeLog: 2014-11-25 Martin Liska Martin li...@suse.cz PR bootstrap/64050 PR ipa/64060 * sreal.c (sreal::operator+): Addition fixed. (sreal::signedless_plus): Negative numbers are handled correctly. (sreal::operator-): Subtraction is fixed. (sreal::signedless_minus): Negative numbers are handled correctly. * sreal.h (sreal::operator): Equal negative numbers are compared correctly. (sreal::shift): New checking asserts are introduced. Operation is fixed. gcc/testsuite/ChangeLog: 2014-11-25 Martin Liska Martin li...@suse.cz PR bootstrap/64050 PR ipa/64060 * gcc.dg/plugin/plugin.exp: New plugin. * gcc.dg/plugin/sreal-test-1.c: New test. * gcc.dg/plugin/sreal_plugin.c: New test. diff --git a/gcc/sreal.c b/gcc/sreal.c index 0337f9e..2b5e3ae 100644 --- a/gcc/sreal.c +++ b/gcc/sreal.c @@ -182,9 +182,9 @@ sreal::operator+ (const sreal other) const { sreal tmp = -(*b_p); if (*a_p tmp) - return signedless_minus (tmp, *a_p, false); + return signedless_minus (tmp, *a_p, true); else - return signedless_minus (*a_p, tmp, true); + return signedless_minus (*a_p, tmp, false); } gcc_checking_assert (a_p-m_negative == b_p-m_negative); @@ -203,7 +203,7 @@ sreal::signedless_plus (const sreal a, const sreal b, bool negative) const sreal *a_p = a; const sreal *b_p = b; - if (*a_p *b_p) + if (a_p-m_exp b_p-m_exp) std::swap (a_p, b_p); dexp = a_p-m_exp - b_p-m_exp; @@ -211,6 +211,7 @@ sreal::signedless_plus (const sreal a, const sreal b, bool negative) if (dexp SREAL_BITS) { r.m_sig = a_p-m_sig; + r.m_negative = negative; return r; } @@ -248,11 +249,11 @@ sreal::operator- (const sreal other) const /* We want to substract a smaller number from bigger for nonegative numbers. */ if (!m_negative *this other) -return -signedless_minus (other, *this, true); +return signedless_minus (other, *this, true); /* Example: -2 - (-3) = 3 - 2 */ if (m_negative *this other) -return signedless_minus (-other, -(*this), true); +return signedless_minus (-other, -(*this), false); sreal r = signedless_minus (*this, other, m_negative); @@ -274,6 +275,7 @@ sreal::signedless_minus (const sreal a, const sreal b, bool negative) if (dexp SREAL_BITS) { r.m_sig = a_p-m_sig; + r.m_negative = negative; return r; } if (dexp == 0) diff --git a/gcc/sreal.h b/gcc/sreal.h index 1362bf6..3938c6e 100644 --- a/gcc/sreal.h +++ b/gcc/sreal.h @@ -60,6 +60,11 @@ public: bool operator (const sreal other) const { +/* We negate result in case of negative numbers and + it would return true for equal negative numbers. */ +if (*this == other) + return false; + if (m_negative != other.m_negative) return m_negative other.m_negative; @@ -86,10 +91,19 @@ public: return tmp; } - sreal shift (int sig) const + sreal shift (int s) const { +gcc_checking_assert (s = SREAL_BITS); +gcc_checking_assert (s = -SREAL_BITS); + +/* Exponent should never be so large because shift_right is used only by + sreal_add and sreal_sub ant thus the number cannot be shifted out from + exponent range. */ +gcc_checking_assert (m_exp + s = SREAL_MAX_EXP); +gcc_checking_assert (m_exp + s = -SREAL_MAX_EXP); + sreal tmp = *this; -tmp.m_sig += sig; +tmp.m_exp += s; return tmp; } diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index e4b5f54..c12b3da 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -59,6 +59,7 @@ set plugin_test_list [list \ { selfassign.c self-assign-test-1.c self-assign-test-2.c } \ { ggcplug.c ggcplug-test-1.c } \ { one_time_plugin.c one_time-test-1.c } \ +{ sreal_plugin.c sreal-test-1.c } \ { start_unit_plugin.c start_unit-test-1.c } \ { finish_unit_plugin.c finish_unit-test-1.c } \ ] diff --git a/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c b/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c new file mode 100644 index 000..1bce2cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c @@ -0,0 +1,8 @@ +/* Test that pass is inserted and invoked once. */ +/* { dg-do compile } */ +/* { dg-options -O } */ + +int main (int argc, char **argv) +{ + return 0; +} diff --git a/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c b/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c new file mode 100644 index 000..f113816 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c @@ -0,0
Re: [PATCH] IPA ICF: memory leak fix
On 11/23/2014 10:01 AM, Markus Trippelsdorf wrote: On 2014.11.22 at 17:46 +0100, Markus Trippelsdorf wrote: On 2014.11.22 at 16:04 +0100, Martin Liška wrote: On 11/22/2014 10:09 AM, Markus Trippelsdorf wrote: On 2014.11.22 at 09:05 +0100, Martin Liška wrote: Following patch removes memory leak that was introduced by very first IPA ICF patch. I would like to thank David for hunting the leak. Patch an bootstrap on x86_86-linux-pc and no regression is introduced. I gave the patch a quick spin on gcc112: *** Error in `/home/trippels/gcc_build_dir/./prev-gcc/lto1': free(): invalid next size (fast): 0x01000a5fc160 *** === Backtrace: = /lib64/libc.so.6(+0xa3d9c)[0x3fff7b6b3d9c] /lib64/libc.so.6(+0xaf0b4)[0x3fff7b6bf0b4] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN3vecIi7va_heap6vl_ptrE7releaseEv-0x1d4bc00)[0x1025dd88] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function14equals_privateEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c083c)[0x116586bc] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function6equalsEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c0578)[0x11658998] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf18sem_item_optimizer7executeEv-0x9b8774)[0x11660a84] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12pass_ipa_icf7executeEP8function-0x9b0314)[0x11668efc] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z16execute_one_passP8opt_pass-0x1647588)[0x1098a0a8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z21execute_ipa_pass_listP8opt_pass-0x1644c2c)[0x1098ca7c] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z8lto_mainv-0x1df20e4)[0x101b494c] /home/trippels/gcc_build_dir/./prev-gcc/lto1[0x10b599b8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN6toplev4mainEiPPc-0x1e8be70)[0x101507b8] /home/trippels/gcc_build_dir/./prev-gcc/lto1(main-0x1ec8d8c)[0x1015493c] /lib64/libc.so.6(+0x447ac)[0x3fff7b6547ac] /lib64/libc.so.6(__libc_start_main-0x19cbf4)[0x3fff7b6549d4] === Memory map: ... Thank you for testing, problem is that I should grow the vector by 1, because '0' is used as NULL value. Please try my fixed patch. This one survives bootstrap-lto. Thanks. But Firefox doesn't build: /home/trippels/gcc_test/usr/local/bin/c++ -fPIC -Wall -Wempty-body -Woverloaded-virtual -Wsign-compare -Wwrite-strings -Werror=endif-labels -Werror=int-to-pointer-cast -Werror=missing-braces -Werror=pointer-arith -Werror=return-type -Werror=sequence-point -Werror=unused-label -Werror=trigraphs -Werror=type-limits -Wno-invalid-offsetof -Wcast-align -flto=160 --param lto-partitions=160 -mcpu=power8 -ffunction-sections -fdata-sections -fno-exceptions -fno-strict-aliasing -frtti -fno-exceptions -fno-math-errno -std=gnu++0x -pthread -pipe -UDEBUG -DNDEBUG -O3 -DU_STATIC_IMPLEMENTATION -fvisibility=hidden -W -Wall -pedantic -Wpointer-arith -Wwrite-strings -Wno-long-long -Wno-unused -Wno-unused-parameter -lpthread -Wl,--hash-style=gnu,--as-needed,--gc-sections,--icf=all -Wl,-z,noexecstack -Wl,-z,text -Wl,--build-id -Wl,--gc-sections -o ../../bin/makeconv makeconv.o ucnvstat.o genmbcs.o gencnvex.o -L../../lib -licutu -L../../lib -licui18n -L../../lib -licuuc -L../../stubdata -licudata - l pthread -ldl -lm lto1: internal compiler error: in operator[], at vec.h:736 0x10122377 vecint, va_heap, vl_embed::operator[](unsigned int) ../../gcc/gcc/vec.h:736 0x10d1a0f3 vecint, va_heap, vl_embed::operator[](unsigned int) ../../gcc/gcc/ipa-icf.c:963 0x10d1a0f3 vecint, va_heap, vl_ptr::operator[](unsigned int) ../../gcc/gcc/vec.h:1202 0x10d1a0f3 ipa_icf::sem_function::bb_dict_test(auto_vecint, 0ul, int, int) ../../gcc/gcc/ipa-icf.c:970 0x10d1aa73 ipa_icf::sem_function::equals_private(ipa_icf::sem_item*, hash_mapsymtab_node*, ipa_icf::sem_item*, default_hashmap_traits) ../../gcc/gcc/ipa-icf.c:512 0x10d1afb3 ipa_icf::sem_function::equals(ipa_icf::sem_item*, hash_mapsymtab_node*, ipa_icf::sem_item*, default_hashmap_traits) ../../gcc/gcc/ipa-icf.c:384 0x10d1dba3 ipa_icf::sem_item_optimizer::subdivide_classes_by_equality(bool) ../../gcc/gcc/ipa-icf.c:1833 0x10d2397f ipa_icf::sem_item_optimizer::execute() ../../gcc/gcc/ipa-icf.c:1652 0x10d24a93 ipa_icf_driver ../../gcc/gcc/ipa-icf.c:2382 0x10d24a93 ipa_icf::pass_ipa_icf::execute(function*) ../../gcc/gcc/ipa-icf.c:2430 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See http://gcc.gnu.org/bugs.html for instructions. lto-wrapper: fatal error: /home/trippels/gcc_test/usr/local/bin/c++ returned 1 exit status compilation terminated. /home/trippels/bin/ld: fatal error: lto-wrapper failed collect2: error: ld returned 1 exit status make[8]: *** [../../bin/makeconv] Error 1 Hi. Well, this final version can run profiled-bootstrap and I capable of building FF
Re: [PATCH 8/9] Negative numbers added for sreal class.
On 11/21/2014 04:21 PM, Martin Liška wrote: On 11/21/2014 04:02 PM, Richard Biener wrote: On Fri, Nov 21, 2014 at 3:39 PM, Martin Liška mli...@suse.cz wrote: Hello. Ok, this is simplified, one can use sreal a = 12345 and it works ;) that's a new API, right? There is no max () and I think that using LONG_MIN here is asking for trouble (host dependence). The comment in the file says the max should be sreal (SREAL_MAX_SIG, SREAL_MAX_EXP) and the min sreal (-SREAL_MAX_SIG, SREAL_MAX_EXP)? Sure, sreal can store much bigger(smaller) numbers :) Where do you need sreal::to_double? The host shouldn't perform double calculations so it can be only for dumping? In which case the user should have used sreal::dump (), maybe with extra arguments. That new function was request from Honza, only for debugging purpose. I agree that dump should this kind of job. If no other problem, I will run tests once more and commit it. Thanks, Martin -#define SREAL_MAX_EXP (INT_MAX / 4) +#define SREAL_MAX_EXP (INT_MAX / 8) this change doesn't look necessary anymore? Btw, it's also odd that... #define SREAL_PART_BITS 32 ... #define SREAL_MIN_SIG ((uint64_t) 1 (SREAL_PART_BITS - 1)) #define SREAL_MAX_SIG (((uint64_t) 1 SREAL_PART_BITS) - 1) thus all m_sig values fit in 32bits but we still use a uint64_t m_sig ... (the implementation uses 64bit for internal computations, but still the storage is wasteful?) Of course the way normalize() works requires that storage to be 64bits to store unnormalized values. I'd say ok with the SREAL_MAX_EXP change reverted. Hi. You are right, this change was done because I used one bit for m_negative (bitfield), not needed any more. Final version attached. Thank you, Martin Thanks, Richard. Otherwise looks good to me and sorry for not noticing the above earlier. Thanks, Richard. Thanks, Martin }; extern void debug (sreal ref); @@ -76,12 +133,12 @@ inline sreal operator+= (sreal a, const sreal b) inline sreal operator-= (sreal a, const sreal b) { -return a = a - b; + return a = a - b; } inline sreal operator/= (sreal a, const sreal b) { -return a = a / b; + return a = a / b; } inline sreal operator*= (sreal a, const sreal b) -- 2.1.2 Hello. After IRC discussions, I decided to give sreal another refactoring where I use int64_t for m_sig. This approach looks much easier and straightforward. I would like to ask folk for comments? I am able to run profiled bootstrap on x86_64-linux-pc and ppc64-linux-pc and new regression is introduced. Thanks, Martin From bff0b4b803271788cd90cfd4032ed6d4e6e95707 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Wed, 26 Nov 2014 15:46:42 +0100 Subject: [PATCH] New sreal implementation which uses int64_t as m_sig. gcc/ChangeLog: 2014-11-27 Martin Liska mli...@suse.cz * sreal.c (sreal::shift_right): New implementation for int64_t as m_sig. (sreal::normalize): Likewise. (sreal::to_int): Likewise. (sreal::operator+): Likewise. (sreal::operator-): Likewise. (sreal::operator*): Likewise. (sreal::operator/): Likewise. (sreal::signedless_minus): Removed. (sreal::signedless_plus): Removed. * sreal.h (sreal::operator): New implementation for int64_t as m_sig. --- gcc/sreal.c | 129 +++- gcc/sreal.h | 52 ++-- 2 files changed, 61 insertions(+), 120 deletions(-) diff --git a/gcc/sreal.c b/gcc/sreal.c index 2b5e3ae..304feb0 100644 --- a/gcc/sreal.c +++ b/gcc/sreal.c @@ -91,7 +91,7 @@ sreal::shift_right (int s) m_exp += s; - m_sig += (uint64_t) 1 (s - 1); + m_sig += (int64_t) 1 (s - 1); m_sig = s; } @@ -100,43 +100,46 @@ sreal::shift_right (int s) void sreal::normalize () { + int64_t s = m_sig 0 ? -1 : 1; + uint64_t sig = m_sig == LONG_MIN ? LONG_MAX : std::abs (m_sig); + if (m_sig == 0) { - m_negative = 0; m_exp = -SREAL_MAX_EXP; } - else if (m_sig SREAL_MIN_SIG) + else if (sig SREAL_MIN_SIG) { do { - m_sig = 1; + sig = 1; m_exp--; + gcc_checking_assert (sig); } - while (m_sig SREAL_MIN_SIG); + while (sig SREAL_MIN_SIG); /* Check underflow. */ if (m_exp -SREAL_MAX_EXP) { m_exp = -SREAL_MAX_EXP; - m_sig = 0; + sig = 0; } } - else if (m_sig SREAL_MAX_SIG) + else if (sig SREAL_MAX_SIG) { int last_bit; do { - last_bit = m_sig 1; - m_sig = 1; + last_bit = sig 1; + sig = 1; m_exp++; } - while (m_sig SREAL_MAX_SIG); + while (sig SREAL_MAX_SIG); /* Round the number. */ - m_sig += last_bit; - if (m_sig SREAL_MAX_SIG) + sig += last_bit; + if (sig SREAL_MAX_SIG) { - m_sig = 1; + sig = 1; m_exp++; } @@ -144,9 +147,11 @@ sreal::normalize () if (m_exp SREAL_MAX_EXP) { m_exp = SREAL_MAX_EXP; - m_sig = SREAL_MAX_SIG; + sig = SREAL_MAX_SIG
Re: [PATCH 2/5] Existing call graph infrastructure enhancement
On 09/24/2014 05:01 PM, Jan Hubicka wrote: Hi. Following patch enhances API functions to be ready for main patch of this patchset. Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-09-21 Martin Liška mli...@suse.cz * cgraph.c (cgraph_node::release_body): New argument keep_arguments introduced. * cgraph.h: Likewise. * cgraphunit.c (cgraph_node::create_wrapper): Usage of new argument introduced. * ipa-devirt.c (polymorphic_type_binfo_p): Safe check for binfos created by Java. * tree-ssa-alias.c (ao_ref_base_alias_set): Static function transformed to global. * tree-ssa-alias.h: Likewise. diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 8f04284..d40a2922 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1637,13 +1637,15 @@ release_function_body (tree decl) are free'd in final.c via free_after_compilation(). */ void -cgraph_node::release_body (void) +cgraph_node::release_body (bool keep_arguments) { ipa_transforms_to_apply.release (); if (!used_as_abstract_origin symtab-state != PARSING) { DECL_RESULT (decl) = NULL; - DECL_ARGUMENTS (decl) = NULL; + + if (!keep_arguments) + DECL_ARGUMENTS (decl) = NULL; } /* If the node is abstract and needed, then do not clear DECL_INITIAL of its associated function function declaration because it's diff --git a/gcc/cgraph.h b/gcc/cgraph.h index a316e40..19ce3b8 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -915,7 +915,7 @@ public: Use this only for functions that are released before being translated to target code (i.e. RTL). Functions that are compiled to RTL and beyond are free'd in final.c via free_after_compilation(). */ - void release_body (void); + void release_body (bool keep_arguments = false); Please add documentation for KEEP_ARGUMENTS explaining that it is useful only if you want to rebuild body as thunk. /* cgraph_node is no longer nested function; update cgraph accordingly. */ void unnest (void); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 3e3b8d2..c4597e2 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2300,7 +2300,7 @@ cgraph_node::create_wrapper (cgraph_node *target) tree decl_result = DECL_RESULT (decl); /* Remove the function's body. */ I would say Remove the function's body but keep arguments to be reused for thunk. -release_body (); +release_body (true); reset (); DECL_RESULT (decl) = decl_result; diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c index af42c6d..f374933 100644 --- a/gcc/ipa-devirt.c +++ b/gcc/ipa-devirt.c @@ -225,7 +225,7 @@ static inline bool polymorphic_type_binfo_p (tree binfo) { /* See if BINFO's type has an virtual table associtated with it. */ - return BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo))); + return BINFO_TYPE (binfo) BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo))); Aha, this change was for Java, right? Please add comment that Java produces BINFOs without BINFO_TYPE set. } /* Return TRUE if all derived types of T are known and thus diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 442112a..1bf88e2 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -559,7 +559,7 @@ ao_ref_base (ao_ref *ref) /* Returns the base object alias set of the memory reference *REF. */ -static alias_set_type +alias_set_type ao_ref_base_alias_set (ao_ref *ref) { tree base_ref; diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h index 436381a..0d35283 100644 --- a/gcc/tree-ssa-alias.h +++ b/gcc/tree-ssa-alias.h @@ -98,6 +98,7 @@ extern void ao_ref_init (ao_ref *, tree); extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree); extern tree ao_ref_base (ao_ref *); extern alias_set_type ao_ref_alias_set (ao_ref *); +extern alias_set_type ao_ref_base_alias_set (ao_ref *); I can not approve this change, but I suppose it is what Richard suggested? There's updated version of the patch that deals with Honza's notes. Yes, I explicitly asked Richard if we can mark the function as global. I will commit the patch soon. Thank you, Martin Patch is OK except for the tree-ssa-alias bits. Honza extern bool ptr_deref_may_alias_global_p (tree); extern bool ptr_derefs_may_alias_p (tree, tree); extern bool ref_may_alias_global_p (tree); diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 1cfc783..fdcaf79 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1625,16 +1625,19 @@ release_function_body (tree decl) /* Release memory used to represent body of function. Use this only for functions that are released before being translated to target code (i.e. RTL). Functions that are compiled to RTL and beyond - are free'd in final.c via free_after_compilation(). */ + are free'd in final.c via free_after_compilation(). + KEEP_ARGUMENTS are useful only if you want to rebuild body as thunk. */ void -cgraph_node::release_body (void
Re: [PATCH 3/5] IPA ICF pass
On 07/17/2014 05:05 PM, Martin Liška wrote: On 07/06/2014 12:53 AM, Jan Hubicka wrote: On Fri, 20 Jun 2014, Trevor Saunders wrote: +@item -fipa-icf +@opindex fipa-icf +Perform Identical Code Folding for functions and read-only variables. I would perhaps explicitly say that the optimizations reduce code size and may disturb unwind stacks by replacing a function by equivalent one with different name. +Behavior is similar to Gold Linker ICF optimization. Symbols proved Perhaps tell a bit more here. The optimization works more effectively with link time optimization enabled and that the Gold and GCC ICF works on different levels and thus are not equivalent optimizations - there are equivallences that are found only by GCC and equivalences found only by Gold. +as semantically equivalent are redirected to corresponding symbol. The pass +sensitively decides for usage of alias, thunk or local redirection. +This flag is enabled by default at @option{-O2}. Probably at -Os too. I found this a bit hard to read/understand. Perhaps first describe what it does and then, before This flag is enabled... note that This is similar to the ICF optimization performed by the Gold linker. Symbols proved (plural) vs to corresponding symbol seems to miss an an a as in a corresponding symbol. Alas, how is that one determined? Is this more ...are merged into one, from the user's perspective? What does it mean to sensitively decide for usage of alias, thunk, or local redirection? I think this is just a technical detail of the implementation. I would not put that into user manual. It means that for some functions you can make alias, for others you need thunk (so addresses stay different) Gerald Hello, there's updated version of patch that newly uses devirtualization machinery to identify polymorphic types that can potentially break ICF (There are such examples in Firefox). Apart from that, I did many small updates, incorporated Trevor's comments and I tried to improve documentation entry for the pass. Patch has been tested for Firefox and Inkscape with LTO. Thanks, Martin Hello. After couple of weeks I spent with fixing new issues connected to the pass: 1) Inliner failed in case I created a thunk and release body of a function. In such situation we need to preserve DECL_ARGUMENTS. I added new argument for: cgraph_node::release_body. 2) Awkward error was hidden in libstdc++ test for trees, there were two functions having one argument that differs in one sub-template. Thank to Richard who helped me to fix alias set accuracy. 3) There was missing comparison for FIELD_DECLS (DECL_FIELD_BIT_OFFSET) which caused me miscompilation. 4) After discussion with Honza, we introduced new cgraph_node flag called icf_merged. The flag helps to fix verifier in cgraph_node::verify. Current version of the patch can bootstrap on x86_64-linux. With following patch applied, there's not testcase regression. I tried to build Firefox, Inkscape, GIMP and Chromium with LTO and patch applied and no regression has been observed. Moreover, I discussed with Richard and the pass is capable of playing role in tree-ssa-tail-merge (according to first experiments). It can replace current usage of value numbering. I hope we can apply the patch to the mainline in a short-term time window? Thank you, Martin From 53d20d0b0c209b50d385ee8d85d5a7ed4594d477 Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 26 Sep 2014 13:51:47 +0200 Subject: [PATCH 1/3] IPA ICF: patch1 --- gcc/Makefile.in |2 + gcc/cgraph.c | 20 +- gcc/cgraph.h |2 + gcc/cgraphunit.c |2 +- gcc/common.opt | 12 + gcc/doc/invoke.texi | 16 +- gcc/ipa-icf-gimple.c | 384 +++ gcc/ipa-icf.c| 2841 ++ gcc/ipa-icf.h| 803 ++ gcc/lto-cgraph.c |2 + gcc/lto-section-in.c |3 +- gcc/lto-streamer.h |1 + gcc/opts.c |6 + gcc/passes.def |1 + gcc/timevar.def |1 + gcc/tree-pass.h |1 + 16 files changed, 4089 insertions(+), 8 deletions(-) create mode 100644 gcc/ipa-icf-gimple.c create mode 100644 gcc/ipa-icf.c create mode 100644 gcc/ipa-icf.h diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3dd9d8f..8d02425 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1265,6 +1265,8 @@ OBJS = \ ipa-profile.o \ ipa-prop.o \ ipa-pure-const.o \ + ipa-icf.o \ + ipa-icf-gimple.o \ ipa-reference.o \ ipa-ref.o \ ipa-utils.o \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index fdcaf79..439db49 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1913,6 +1913,8 @@ cgraph_node::dump (FILE *f) fprintf (f, only_called_at_exit); if (tm_clone) fprintf (f, tm_clone); + if (icf_merged) +fprintf (f, icf_merged); if (DECL_STATIC_CONSTRUCTOR (decl)) fprintf (f, static_constructor (priority:%i), get_init_priority ()); if (DECL_STATIC_DESTRUCTOR (decl
Re: [PATCH 4/5] Existing tests fix
On 06/30/2014 02:11 PM, Martin Liška wrote: On 06/17/2014 09:52 PM, Jeff Law wrote: On 06/13/14 04:48, mliska wrote: Hi, many tests rely on a precise number of scanned functions in a dump file. If IPA ICF decides to merge some function and(or) read-only variables, counts do not match. Martin Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * c-c++-common/rotate-1.c: Text * c-c++-common/rotate-2.c: New test. * c-c++-common/rotate-3.c: Likewise. * c-c++-common/rotate-4.c: Likewise. * g++.dg/cpp0x/rv-return.C: Likewise. * g++.dg/cpp0x/rv1n.C: Likewise. * g++.dg/cpp0x/rv1p.C: Likewise. * g++.dg/cpp0x/rv2n.C: Likewise. * g++.dg/cpp0x/rv3n.C: Likewise. * g++.dg/cpp0x/rv4n.C: Likewise. * g++.dg/cpp0x/rv5n.C: Likewise. * g++.dg/cpp0x/rv6n.C: Likewise. * g++.dg/cpp0x/rv7n.C: Likewise. * gcc.dg/ipa/ipacost-1.c: Likewise. * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/ipa/ipcp-agg-6.c: Likewise. * gcc.dg/ipa/remref-2a.c: Likewise. * gcc.dg/ipa/remref-2b.c: Likewise. * gcc.dg/pr46309-2.c: Likewise. * gcc.dg/torture/ipa-pta-1.c: Likewise. * gcc.dg/tree-ssa/andor-3.c: Likewise. * gcc.dg/tree-ssa/andor-4.c: Likewise. * gcc.dg/tree-ssa/andor-5.c: Likewise. * gcc.dg/vect/no-vfa-pr29145.c: Likewise. * gcc.dg/vect/vect-cond-10.c: Likewise. * gcc.dg/vect/vect-cond-9.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise. * gcc.target/i386/bmi-1.c: Likewise. * gcc.target/i386/bmi-2.c: Likewise. * gcc.target/i386/pr56564-2.c: Likewise. * g++.dg/opt/pr30965.C: Likewise. * g++.dg/tree-ssa/pr19637.C: Likewise. * gcc.dg/guality/csttest.c: Likewise. * gcc.dg/ipa/iinline-4.c: Likewise. * gcc.dg/ipa/iinline-7.c: Likewise. * gcc.dg/ipa/ipa-pta-13.c: Likewise. I know this is the least interesting part of your changes, but it's also simple and mechanical and thus trivial to review. Approved, but obviously don't install until the rest of your patch has been approved. Similar changes for recently added tests or cases where you might improve ICF requiring similar tweaks to existing tests are pre-approved as well. jeff Hello, I fixed few more tests and added correct ChangeLog message. gcc/testsuite/ChangeLog 2014-06-30 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * c-c++-common/rotate-1.c: Test fixed. * c-c++-common/rotate-2.c: Likewise. * c-c++-common/rotate-3.c: Likewise. * c-c++-common/rotate-4.c: Likewise. * g++.dg/cpp0x/rv-return.C: Likewise. * g++.dg/cpp0x/rv1n.C: Likewise. * g++.dg/cpp0x/rv1p.C: Likewise. * g++.dg/cpp0x/rv2n.C: Likewise. * g++.dg/cpp0x/rv3n.C: Likewise. * g++.dg/cpp0x/rv4n.C: Likewise. * g++.dg/cpp0x/rv5n.C: Likewise. * g++.dg/cpp0x/rv6n.C: Likewise. * g++.dg/cpp0x/rv7n.C: Likewise. * g++.dg/ipa/devirt-g-1.C: Likewise. * g++.dg/ipa/inline-1.C: Likewise. * g++.dg/ipa/inline-2.C: Likewise. * g++.dg/ipa/inline-3.C: Likewise. * g++.dg/opt/pr30965.C: Likewise. * g++.dg/tree-ssa/pr19637.C: Likewise. * gcc.dg/guality/csttest.c: Likewise. * gcc.dg/ipa/iinline-4.c: Likewise. * gcc.dg/ipa/iinline-7.c: Likewise. * gcc.dg/ipa/ipa-pta-13.c: Likewise. * gcc.dg/ipa/ipacost-1.c: Likewise. * gcc.dg/ipa/ipacost-2.c: Likewise. * gcc.dg/ipa/ipcp-agg-6.c: Likewise. * gcc.dg/ipa/remref-2a.c: Likewise. * gcc.dg/ipa/remref-2b.c: Likewise. * gcc.dg/pr46309-2.c: Likewise. * gcc.dg/torture/ipa-pta-1.c: Likewise. * gcc.dg/tree-ssa/andor-3.c: Likewise. * gcc.dg/tree-ssa/andor-4.c: Likewise. * gcc.dg/tree-ssa/andor-5.c: Likewise. * gcc.dg/vect/no-vfa-pr29145.c: Likewise. * gcc.dg/vect/vect-cond-10.c: Likewise. * gcc.dg/vect/vect-cond-9.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise. * gcc.target/i386/bmi-1.c: Likewise. * gcc.target/i386/bmi-2.c: Likewise. * gcc.target/i386/pr56564-2.c: Likewise. Thank you, Martin Hello. There's updated version of the patch that fixes another issued connected to test suite. Thanks, Martin From e7818e646687c05e13a68828ef70fb41716a267c Mon Sep 17 00:00:00 2001 From: mliska mli...@suse.cz Date: Fri, 26 Sep 2014 13:52:29 +0200 Subject: [PATCH 2/3] IPA ICF: patch2. --- gcc/testsuite/c-c++-common/rotate-1.c | 2 +- gcc/testsuite/c-c++-common/rotate-2.c | 2 +- gcc/testsuite/c-c++-common/rotate-3.c | 2 +- gcc/testsuite/c-c++-common/rotate-4.c | 2 +- gcc/testsuite/g++.dg/cpp0x/rv-return.C| 1 + gcc/testsuite/g++.dg/cpp0x/rv1n.C | 2
Re: [PATCH 3/5] IPA ICF pass
On 09/27/2014 01:27 AM, Jan Hubicka wrote: While a plain Firefox -flto build works fine. LTO/PGO build fails with: lto1: internal compiler error: in ipa_merge_profiles, at ipa-utils.c:540 0x7d6165 ipa_merge_profiles(cgraph_node*, cgraph_node*) ../../gcc/gcc/ipa-utils.c:540 0xf10c41 ipa_icf::sem_function::merge(ipa_icf::sem_item*) ../../gcc/gcc/ipa-icf.c:753 0xf15206 ipa_icf::sem_item_optimizer::merge_classes(unsigned int) ../../gcc/gcc/ipa-icf.c:2706 0xf1c1f4 ipa_icf::sem_item_optimizer::execute() ../../gcc/gcc/ipa-icf.c:2098 0xf1d3f1 ipa_icf_driver ../../gcc/gcc/ipa-icf.c:2784 0xf1d3f1 ipa_icf::pass_ipa_icf::execute(function*) ../../gcc/gcc/ipa-icf.c:2831 The pass is also very memory hungry (from 3GB without ICF to 4GB during libxul link), while the code size savings are in the 1% range. The majority of the problem are groups of candidates that are built according to hash. The hash value is based on a number of arguments, number of BB, number of gimple statements and types of these statements. It groups function into classes. In WPA (before a body of any function is loaded) I get following histogram: Dump after WPA based types groups Congruence classes: 97204 (unique hash values: 88725), with total: 191457 items Class size histogram [num of members]: number of classe number of classess [1]: 86453 classes [2]: 5680 classes [3]: 1541 classes [4]: 915 classes [5]: 446 classes [6]: 346 classes [7]: 200 classes [8]: 181 classes [9]: 154 classes [10]: 109 classes [11]: 87 classes [12]: 87 classes [13]: 68 classes [14]: 58 classes [15]: 58 classes [16]: 41 classes [17]: 25 classes [18]: 33 classes [19]: 28 classes [20]: 25 classes [21]: 19 classes [22]: 30 classes [23]: 24 classes [24]: 33 classes [25]: 17 classes [26]: 15 classes [27]: 10 classes [28]: 13 classes [29]: 18 classes [30]: 10 classes It means that each class with more than one member needs to be iterated and these functions are compared. And yes, there's the root of the problem. I have to load function body to process deep function comparison. As you can see, we have almost 200k function, where more than half each situated in a group with more that one member. So that 1GB extra memory usage is caused by these bodies: Init called for 105004 items (54.84%). Memory footprint can be significantly reduced if one can load the body and release it and the memory is freed. I asked Honza about it, but it looks GGC mechanism cannot be easily forced to release it. Thnks for checking. I was just thinking about doing that myself. Would you mind posting -ftime-report of firefox WPA stage? It seems that in this case we reject too many of equality candidates? It think the original numbers was about 4-5% but later some equivalences was disabled because of devirt/aliasing issues. Do you compare it with gold ICF enabled? There are quite few obvious improvements to the analysis that can be done, but I guess we need to analyze the interesting cases one by one. You are right, the number were quite promising, but during the time, I had to reduce the aggressivity of the pass. As Honza said, it can be improved step-by-step. One thing that Martin can try is to hook into lto-symtab and try to check that the COMDAT functions that are known to be same pass the equality check. I suppose we will learn interesting things this way. Good point, I will try it. Martin I think the patch adds quite important infrastructure for gimple semantic equality checking and function merging. I went through the majority of code and I think it is mostly ready to mainline (i.e. cleaner than what we have in tree-ssa-tailmerge) so hope we can finish the review process next week. We will need to get better cost/benefits ratio to enable it for -O2 that is someting I would really like to see for 5.0, but it seems to be easier to handle this incrementally Thank you for the review, Martin Honza
Re: [PATCH 3/5] IPA ICF pass
On 09/27/2014 07:59 AM, Markus Trippelsdorf wrote: On 2014.09.27 at 01:27 +0200, Jan Hubicka wrote: While a plain Firefox -flto build works fine. LTO/PGO build fails with: lto1: internal compiler error: in ipa_merge_profiles, at ipa-utils.c:540 0x7d6165 ipa_merge_profiles(cgraph_node*, cgraph_node*) ../../gcc/gcc/ipa-utils.c:540 0xf10c41 ipa_icf::sem_function::merge(ipa_icf::sem_item*) ../../gcc/gcc/ipa-icf.c:753 0xf15206 ipa_icf::sem_item_optimizer::merge_classes(unsigned int) ../../gcc/gcc/ipa-icf.c:2706 0xf1c1f4 ipa_icf::sem_item_optimizer::execute() ../../gcc/gcc/ipa-icf.c:2098 0xf1d3f1 ipa_icf_driver ../../gcc/gcc/ipa-icf.c:2784 0xf1d3f1 ipa_icf::pass_ipa_icf::execute(function*) ../../gcc/gcc/ipa-icf.c:2831 The pass is also very memory hungry (from 3GB without ICF to 4GB during libxul link), while the code size savings are in the 1% range. Thnks for checking. I was just thinking about doing that myself. Would you mind posting -ftime-report of firefox WPA stage? (without ICF) Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall1412 kB ( 0%) ggc phase opt and generate : 58.38 (63%) usr 2.00 (47%) sys 60.37 (40%) wall 403069 kB (12%) ggc phase stream in : 30.24 (33%) usr 0.97 (23%) sys 33.90 (22%) wall 2944210 kB (88%) ggc phase stream out: 4.29 ( 5%) usr 1.32 (31%) sys 57.32 (38%) wall 0 kB ( 0%) ggc phase finalize : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.13 ( 0%) wall 0 kB ( 0%) ggc garbage collection : 3.68 ( 4%) usr 0.00 ( 0%) sys 3.68 ( 2%) wall 0 kB ( 0%) ggc callgraph optimization : 0.50 ( 1%) usr 0.00 ( 0%) sys 0.50 ( 0%) wall 166 kB ( 0%) ggc ipa dead code removal : 6.91 ( 7%) usr 0.08 ( 2%) sys 7.25 ( 5%) wall 0 kB ( 0%) ggc ipa virtual call target : 7.08 ( 8%) usr 0.04 ( 1%) sys 6.93 ( 5%) wall 0 kB ( 0%) ggc ipa devirtualization: 0.27 ( 0%) usr 0.00 ( 0%) sys 0.27 ( 0%) wall 10365 kB ( 0%) ggc ipa cp : 1.81 ( 2%) usr 0.06 ( 1%) sys 3.40 ( 2%) wall 173701 kB ( 5%) ggc ipa inlining heuristics : 16.60 (18%) usr 0.27 ( 6%) sys 17.48 (12%) wall 532704 kB (16%) ggc ipa comdats : 0.19 ( 0%) usr 0.00 ( 0%) sys 0.19 ( 0%) wall 0 kB ( 0%) ggc ipa lto gimple out : 0.21 ( 0%) usr 0.04 ( 1%) sys 0.97 ( 1%) wall 0 kB ( 0%) ggc ipa lto decl in : 18.29 (20%) usr 0.54 (13%) sys 18.96 (12%) wall 2226088 kB (66%) ggc ipa lto decl out: 3.93 ( 4%) usr 0.13 ( 3%) sys 4.06 ( 3%) wall 0 kB ( 0%) ggc ipa lto constructors in : 0.24 ( 0%) usr 0.03 ( 1%) sys 0.59 ( 0%) wall 14226 kB ( 0%) ggc ipa lto constructors out: 0.08 ( 0%) usr 0.04 ( 1%) sys 0.15 ( 0%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 0.89 ( 1%) usr 0.12 ( 3%) sys 1.02 ( 1%) wall 364151 kB (11%) ggc ipa lto decl merge : 2.14 ( 2%) usr 0.01 ( 0%) sys 2.14 ( 1%) wall8196 kB ( 0%) ggc ipa lto cgraph merge: 1.59 ( 2%) usr 0.00 ( 0%) sys 1.60 ( 1%) wall 12716 kB ( 0%) ggc whopr wpa : 1.54 ( 2%) usr 0.03 ( 1%) sys 1.55 ( 1%) wall 1 kB ( 0%) ggc whopr wpa I/O : 0.04 ( 0%) usr 1.11 (26%) sys 52.10 (34%) wall 0 kB ( 0%) ggc whopr partitioning : 5.02 ( 5%) usr 0.01 ( 0%) sys 5.03 ( 3%) wall4938 kB ( 0%) ggc ipa reference : 2.04 ( 2%) usr 0.02 ( 0%) sys 2.08 ( 1%) wall 0 kB ( 0%) ggc ipa profile : 0.32 ( 0%) usr 0.00 ( 0%) sys 0.33 ( 0%) wall 0 kB ( 0%) ggc ipa pure const : 2.43 ( 3%) usr 0.02 ( 0%) sys 2.49 ( 2%) wall 0 kB ( 0%) ggc tree STMT verifier : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 0 kB ( 0%) ggc callgraph verifier : 16.31 (18%) usr 1.69 (39%) sys 17.96 (12%) wall 0 kB ( 0%) ggc dominance computation : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.02 ( 0%) wall 0 kB ( 0%) ggc varconst: 0.01 ( 0%) usr 0.03 ( 1%) sys 0.05 ( 0%) wall 0 kB ( 0%) ggc unaccounted todo: 0.69 ( 1%) usr 0.00 ( 0%) sys 0.69 ( 0%) wall 0 kB ( 0%) ggc TOTAL : 92.91 4.29 151.73 3348693 kB Extra diagnostic checks enabled; compiler may run slowly. Configure with --enable-checking=release to disable checks. (with ICF) Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall1412 kB ( 0%) ggc phase opt and generate : 82.70 (70%) usr 3.31 (53%) sys 86.17 (45%) wall 1468975 kB (33%) ggc phase stream in : 30.46 (26%) usr 1.02 (16%) sys 31.48 (16%) wall 2944210 kB (67%) ggc phase stream out: 4.52 ( 4%) usr
Re: [PATCH 3/5] IPA ICF pass
On 09/27/2014 09:47 AM, Markus Trippelsdorf wrote: On 2014.09.27 at 07:59 +0200, Markus Trippelsdorf wrote: It seems that in this case we reject too many of equality candidates? It think the original numbers was about 4-5% but later some equivalences was disabled because of devirt/aliasing issues. Do you compare it with gold ICF enabled? There are quite few obvious improvements to the analysis that can be done, but I guess we need to analyze the interesting cases one by one. Forgot to post the binary size numbers (in bytes): | gold's icf off | gold's icf on | --+++ gcc's icf off |79793880|74881040| --+-+ gcc's icf on |78043608|73612800| --+++ Thanks once more! Gold ICF is quite strong, I will verify what functions are not caught by IPA ICF. These data present that IPA ICF can reduce the binary by 2.19%. I know that it's quite a small improvement, but if you realize that the pass can reduce just the size of .text (and slightly related sections). There are stats about libxul.so (please ignore last 3 columns): Section name Start Size in BSizePortion Disk read in B Disk read Sec. portion 0 0 0.00 B 0.00% 0 0.00 B 0.00% .note.gnu.build-i512 36 36.00 B 0.00% 0 0.00 B 0.00% .dynsym 552 8119279.29 KB 0.08% 0 0.00 B 0.00% .dynstr81744 9085988.73 KB 0.09% 0 0.00 B 0.00% .hash 172608 2175221.24 KB 0.02% 0 0.00 B 0.00% .gnu.version 1943606766 6.61 KB 0.01% 0 0.00 B 0.00% .gnu.version_d201128 56 56.00 B 0.00% 0 0.00 B 0.00% .gnu.version_r2011841216 1.19 KB 0.00% 0 0.00 B 0.00% .rela.dyn 202400 8198208 7.82 MB 8.56% 0 0.00 B 0.00% .rela.plt8400608 7027268.62 KB 0.07% 0 0.00 B 0.00% .init8470880 26 26.00 B 0.00% 0 0.00 B 0.00% .plt 8470912 4686445.77 KB 0.05% 0 0.00 B 0.00% .text85177763901433337.21 MB 40.72% 0 0.00 B 0.00% .fini 47532112 9 9.00 B 0.00% 0 0.00 B 0.00% .rodata 475322881525856014.55 MB 15.93% 0 0.00 B 0.00% .eh_frame 62790848 6203564 5.92 MB 6.47% 0 0.00 B 0.00% .eh_frame_hdr 68994412 1088012 1.04 MB 1.14% 0 0.00 B 0.00% .tbss 70082560 4 4.00 B 0.00% 0 0.00 B 0.00% .dynamic700825601104 1.08 KB 0.00% 0 0.00 B 0.00% .got700836641384 1.35 KB 0.00% 0 0.00 B 0.00% .got.plt70085048 2344822.90 KB 0.02% 0 0.00 B 0.00% .data 70108544 811616 792.59 KB 0.85% 0 0.00 B 0.00% .jcr70920160 8 8.00 B 0.00% 0 0.00 B 0.00% .tm_clone_table 70920168 0 0.00 B 0.00% 0 0.00 B 0.00% .fini_array 70920168 8 8.00 B 0.00% 0 0.00 B 0.00% .init_array 70920176 16 16.00 B 0.00% 0 0.00 B 0.00% .data.rel.ro.loca 70920192 3938880 3.76 MB 4.11% 0 0.00 B 0.00% .data.rel.ro74859072 269216 262.91 KB 0.28% 0 0.00 B 0.00% .bss75128320 1844246 1.76 MB 1.92% 0 0.00 B 0.00% .debug_line 75128288 517517.00 B 0.00% 0 0.00 B 0.00% .debug_info 75128805 817817.00 B 0.00% 0 0.00 B 0.00% .debug_abbrev 75129622 438438.00 B 0.00% 0
Re: [PATCH, DOC]: New value 'default' for --enable-languages
On 08/21/2014 01:57 PM, Martin Liška wrote: Ping. There was no explicit agreement that I can commit the change to trunk? Thanks, Martin On 07/30/2014 08:19 PM, Martin Liška wrote: On 07/30/2014 06:38 PM, Mike Stump wrote: On Jul 30, 2014, at 6:20 AM, Richard Biener richard.guent...@gmail.com wrote: On Wed, Jul 30, 2014 at 3:19 PM, Richard Biener richard.guent...@gmail.com wrote: On Wed, Jul 30, 2014 at 1:37 PM, Martin Liška mli...@suse.cz wrote: Hello, as discussed in previous thread (https://gcc.gnu.org/ml/gcc-patches/2014-07/msg02010.html), I would like to add more intuitive behavior for --enable-languages configure option. It works for me, but as I'm currently always testing all,ada,obj-c++ how can I easily continue to do that - that is, not test go? ;) Of course with default,ada,obj-c++ ... stupid me. In time, we’ll have a all,!go…. :-) Does 'go' mean that the patch is ready for trunk :D ? Martin PING^2. Thank you, Martin
[PATCH] lto.c: add ARG_UNUSED for an argument
Hello. In lto.c, if #ifdef HAVE_WORKING_FORK isn't defined, 'last' argument is unused and in that case GCC cannot bootstrap due to a warning. Ready for trunk? Built on x86_64-linux-gnu. Thanks, Martin gcc/lto/ChangeLog: 2014-10-06 Martin Liska mli...@suse.cz * lto.c (stream_out): ARG_UNUSED added for last argument. diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c index 6cbb178..bc53632 100644 --- a/gcc/lto/lto.c +++ b/gcc/lto/lto.c @@ -2495,7 +2495,8 @@ wait_for_child () Fork if that seems to help. */ static void -stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last) +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, + bool ARG_UNUSED (last)) { #ifdef HAVE_WORKING_FORK static int nruns;
[PATCH] PR/63376
Hello. This is a oneline patch that fixed the issue in PR63376. This was a mechanical error and I will commit it as obivous. Thank you, Martin gcc/ChangeLog: 2014-10-11 Martin Liska mli...@suse.cz PR/63376 * cgraphunit.c (symbol_table::process_new_functions): Missing call for call_cgraph_insertion_hooks added. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index d463505..5985e3d 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -329,6 +329,7 @@ symbol_table::process_new_functions (void) free_dominance_info (CDI_POST_DOMINATORS); free_dominance_info (CDI_DOMINATORS); pop_cfun (); + call_cgraph_insertion_hooks (node); break; case EXPANSION:
Re: [PATCH 3/5] IPA ICF pass
On 09/28/2014 03:20 AM, Jan Hubicka wrote: Hi. Thank you Markus for presenting numbers, it corresponds with I measured. If I see correctly, IPA ICF pass takes about 7 seconds, the rest is distributed in verifier (not interesting for release version of the compiler) and 'phase opt and generate'. No idea what can make the difference? phase opt and generate just combine all the optimization times together, so it is same 7 seconds as in the ICF pass :) 1GB of function bodies just to elimnate 2-3% of code seems quite alot. Do you have any idea how many of those turns out to be different? It would be nice to be able to release the duplicate bodies from memory after the equivalency was stablished Honza Martin (I resend the message, my mail client was a bit confused, please do _not_ reply to faktur...@foxlink.cz) Hello. After few days of measurement and tuning, I was able to get numbers to the following shape: Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 1412 kB ( 0%) ggc phase opt and generate : 27.83 (59%) usr 0.66 (19%) sys 28.52 (37%) wall 1028813 kB (24%) ggc phase stream in : 16.90 (36%) usr 0.63 (18%) sys 17.60 (23%) wall 3246453 kB (76%) ggc phase stream out: 2.76 ( 6%) usr 2.19 (63%) sys 31.34 (40%) wall 2 kB ( 0%) ggc callgraph optimization : 0.36 ( 1%) usr 0.00 ( 0%) sys 0.35 ( 0%) wall 40 kB ( 0%) ggc ipa dead code removal : 3.31 ( 7%) usr 0.01 ( 0%) sys 3.25 ( 4%) wall 0 kB ( 0%) ggc ipa virtual call target : 3.69 ( 8%) usr 0.03 ( 1%) sys 3.80 ( 5%) wall 21 kB ( 0%) ggc ipa devirtualization: 0.12 ( 0%) usr 0.00 ( 0%) sys 0.15 ( 0%) wall 13704 kB ( 0%) ggc ipa cp : 1.11 ( 2%) usr 0.07 ( 2%) sys 1.17 ( 2%) wall 188558 kB ( 4%) ggc ipa inlining heuristics : 8.17 (17%) usr 0.14 ( 4%) sys 8.27 (11%) wall 494738 kB (12%) ggc ipa comdats : 0.12 ( 0%) usr 0.00 ( 0%) sys 0.12 ( 0%) wall 0 kB ( 0%) ggc ipa lto gimple in : 1.86 ( 4%) usr 0.40 (11%) sys 2.20 ( 3%) wall 537970 kB (13%) ggc ipa lto gimple out : 0.19 ( 0%) usr 0.08 ( 2%) sys 0.27 ( 0%) wall 2 kB ( 0%) ggc ipa lto decl in : 12.20 (26%) usr 0.37 (11%) sys 12.64 (16%) wall 2441687 kB (57%) ggc ipa lto decl out: 2.51 ( 5%) usr 0.21 ( 6%) sys 2.71 ( 3%) wall 0 kB ( 0%) ggc ipa lto constructors in : 0.13 ( 0%) usr 0.02 ( 1%) sys 0.17 ( 0%) wall 15692 kB ( 0%) ggc ipa lto constructors out: 0.03 ( 0%) usr 0.00 ( 0%) sys 0.03 ( 0%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 0.54 ( 1%) usr 0.09 ( 3%) sys 0.63 ( 1%) wall 407182 kB (10%) ggc ipa lto decl merge : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.34 ( 2%) wall 8220 kB ( 0%) ggc ipa lto cgraph merge: 1.00 ( 2%) usr 0.00 ( 0%) sys 1.00 ( 1%) wall 14605 kB ( 0%) ggc whopr wpa : 0.92 ( 2%) usr 0.00 ( 0%) sys 0.89 ( 1%) wall 1 kB ( 0%) ggc whopr wpa I/O : 0.01 ( 0%) usr 1.90 (55%) sys 28.31 (37%) wall 0 kB ( 0%) ggc whopr partitioning : 2.81 ( 6%) usr 0.01 ( 0%) sys 2.83 ( 4%) wall 4943 kB ( 0%) ggc ipa reference : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.35 ( 2%) wall 0 kB ( 0%) ggc ipa profile : 0.20 ( 0%) usr 0.01 ( 0%) sys 0.21 ( 0%) wall 0 kB ( 0%) ggc ipa pure const : 1.62 ( 3%) usr 0.00 ( 0%) sys 1.63 ( 2%) wall 0 kB ( 0%) ggc ipa icf : 2.65 ( 6%) usr 0.02 ( 1%) sys 2.68 ( 3%) wall 1352 kB ( 0%) ggc inline parameters : 0.00 ( 0%) usr 0.01 ( 0%) sys 0.00 ( 0%) wall 0 kB ( 0%) ggc tree SSA rewrite: 0.11 ( 0%) usr 0.01 ( 0%) sys 0.08 ( 0%) wall 18919 kB ( 0%) ggc tree SSA other : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc tree SSA incremental: 0.24 ( 1%) usr 0.01 ( 0%) sys 0.32 ( 0%) wall 11325 kB ( 0%) ggc tree operand scan : 0.15 ( 0%) usr 0.02 ( 1%) sys 0.18 ( 0%) wall 116283 kB ( 3%) ggc dominance frontiers : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.02 ( 0%) wall 0 kB ( 0%) ggc dominance computation : 0.13 ( 0%) usr 0.01 ( 0%) sys 0.16 ( 0%) wall 0 kB ( 0%) ggc varconst: 0.01 ( 0%) usr 0.02 ( 1%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc loop fini : 0.02 ( 0%) usr 0.00 ( 0%) sys 0.04 ( 0%) wall 0 kB ( 0%) ggc unaccounted todo: 0.55 ( 1%) usr 0.00 ( 0%) sys 0.56 ( 1%) wall 0 kB ( 0%) ggc TOTAL : 47.49 3.4877.46 4276682 kB and I was able to reduce function bodies loaded in WPA to 35% (from previous 55%). The main problem with speed was hidden in work list for congruence classes, where hash_set was used. I chose
Re: [PATCH 3/5] IPA ICF pass
On 09/26/2014 09:46 PM, Jan Hubicka wrote: Hi, this is on ipa-icf-gimple.c @@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void) { if (verify_edge_corresponds_to_fndecl (e, decl)) { - error (edge points to wrong declaration:); - debug_tree (e-callee-decl); - fprintf (stderr, Instead of:); - debug_tree (decl); - error_found = true; + /* The edge can be redirected in WPA by IPA ICF. + Following check really ensures that it's + not the case. */ + + cgraph_node *current_node = cgraph_node::get (decl); + if (!current_node || !current_node-icf_merged) I would move this into verify_edge_corresponds_to_fndecl. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c new file mode 100644 index 000..7031eaa --- /dev/null +++ b/gcc/ipa-icf-gimple.c @@ -0,0 +1,384 @@ +/* Interprocedural Identical Code Folding pass + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska mli...@suse.cz + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ Please add toplevel comment about what the code does and how to use it. +namespace ipa_icf { + +/* Basic block equivalence comparison function that returns true if + basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond. */ ... to each other? I would add short comment that as comparsion goes you build voclabulary of equivalences of variables/ssanames etc. So people reading the code do not get lost at very beggining. + +bool +func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) +{ + unsigned i; + gimple_stmt_iterator gsi1, gsi2; + gimple s1, s2; + + if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count + || bb1-edge_count != bb2-edge_count) +return RETURN_FALSE (); The UPPERCASE looks ugly. I see that RETURN_FALSE is a warpper for return_false_with_msg that outputs line and file information. I would make it lowercase even if it is macro. You may consider using CXX_MEM_STAT_INFO style default argument to avoid function macro completely. Probably not big win given that it won't save you from preprocesor mess. + + gsi1 = gsi_start_bb (bb1-bb); + gsi2 = gsi_start_bb (bb2-bb); + + for (i = 0; i bb1-nondbg_stmt_count; i++) +{ + if (is_gimple_debug (gsi_stmt (gsi1))) + gsi_next_nondebug (gsi1); + + if (is_gimple_debug (gsi_stmt (gsi2))) + gsi_next_nondebug (gsi2); + + s1 = gsi_stmt (gsi1); + s2 = gsi_stmt (gsi2); + + if (gimple_code (s1) != gimple_code (s2)) + return RETURN_FALSE_WITH_MSG (gimple codes are different); I think you need to compare EH here. Consider case where one unit is compiled with -fno-exception and thus all EH regions are removed, while other function has EH regions in it. Those are not equivalent. EH region is obtained by lookup_stmt_eh and then you need to comapre them for match as you do with gimple_resx_regoin. + t1 = gimple_call_fndecl (s1); + t2 = gimple_call_fndecl (s2); + + /* Function pointer variables are not supported yet. */ They seems to be, compare_operand seems just right. + +/* Verifies for given GIMPLEs S1 and S2 that + label statements are semantically equivalent. */ + +bool +func_checker::compare_gimple_label (gimple g1, gimple g2) +{ + if (m_ignore_labels) +return true; + + tree t1 = gimple_label_label (g1); + tree t2 = gimple_label_label (g2); + + return compare_tree_ssa_label (t1, t2); +} I would expect the main BB loop to record BB in which label belongs to and the BB assciatio neing checked here. Otherwise I do not see how switch statements are compared to not have different permutations of targets. Also note that one BB may have multiple labels in them and they are equivalent. Also I would punt on occurence of FORCED_LABEL. Those are tricky as they may be passed around and compared for address and no one really defines what should happen. Better to avoid those. Hi. I will
Re: [PATCH 3/5] IPA ICF pass
On 09/26/2014 11:27 PM, Jan Hubicka wrote: diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c new file mode 100644 index 000..f3472fe --- /dev/null +++ b/gcc/ipa-icf.c @@ -0,0 +1,2841 @@ +/* Interprocedural Identical Code Folding pass + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska mli...@suse.cz + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ + +/* Interprocedural Identical Code Folding for functions and + read-only variables. + + The goal of this transformation is to discover functions and read-only + variables which do have exactly the same semantics. (or value) + + In case of functions, + we could either create a virtual clone or do a simple function wrapper + that will call equivalent function. If the function is just locally visible, + all function calls can be redirected. For read-only variables, we create + aliases if possible. + + Optimization pass arranges as follows: The optimization pass is arranged as follows: (I guess) I also wonder if the gimple equality code should be in ipa_icf namespace, it is intended to be shared with tail merging pass, so what about just calling it gimple_sem_equality? +/* Verification function for edges E1 and E2. */ + +bool +func_checker::compare_edge (edge e1, edge e2) +{ + if (e1-flags != e2-flags) +return false; In future we may want to experiment with checking that edge probabilities with profile feedback match and refuse to merge BBs with different outgoing probabilities (i.e. +-5%). Just add it as TODO there, please. + +/* Return true if types are compatible from perspective of ICF. */ +bool func_checker::types_are_compatible_p (tree t1, tree t2, Perhaps dropping _are_ would make sense, so we do not have two names for essentially same thing. +bool compare_polymorphic, +bool first_argument) +{ + if (TREE_CODE (t1) != TREE_CODE (t2)) +return RETURN_FALSE_WITH_MSG (different tree types); + + if (!types_compatible_p (t1, t2)) +return RETURN_FALSE_WITH_MSG (types are not compatible); + + if (get_alias_set (t1) != get_alias_set (t2)) +return RETURN_FALSE_WITH_MSG (alias sets are different); You do not need to compare alias sets except for memory operations IMO. Hello. Yeah, you are right. But even Richard advised me to put it to a single place. Maybe we are a bit more strict than it would be necessary. But I hope that's fine ;) + + /* We call contains_polymorphic_type_p with this pointer type. */ + if (first_argument TREE_CODE (t1) == POINTER_TYPE) +{ + t1 = TREE_TYPE (t1); + t2 = TREE_TYPE (t2); +} + + if (compare_polymorphic + (contains_polymorphic_type_p (t1) || contains_polymorphic_type_p (t2))) +{ + if (!contains_polymorphic_type_p (t1) || !contains_polymorphic_type_p (t2)) +return RETURN_FALSE_WITH_MSG (one type is not polymorphic); + + if (TYPE_MAIN_VARIANT (t1) != TYPE_MAIN_VARIANT (t2)) +return RETURN_FALSE_WITH_MSG (type variants are different for + polymorphic type); I added types_must_be_same_for_odr (t1,t2) for you here. +/* Fast equality function based on knowledge known in WPA. */ + +bool +sem_function::equals_wpa (sem_item *item) +{ + gcc_assert (item-type == FUNC); + + m_compared_func = static_castsem_function * (item); + + if (arg_types.length () != m_compared_func-arg_types.length ()) +return RETURN_FALSE_WITH_MSG (different number of arguments); + + /* Checking types of arguments. */ + for (unsigned i = 0; i arg_types.length (); i++) +{ + /* This guard is here for function pointer with attributes (pr59927.c). */ + if (!arg_types[i] || !m_compared_func-arg_types[i]) +return RETURN_FALSE_WITH_MSG (NULL argument type); + + if (!func_checker::types_are_compatible_p (arg_types[i], + m_compared_func-arg_types[i], + true, i == 0)) +return RETURN_FALSE_WITH_MSG (argument type is different); +} + + /* Result type checking. */ + if (!func_checker::types_are_compatible_p (result_type, + m_compared_func-result_type)) +return RETURN_FALSE_WITH_MSG (result types are different); You may want to compare ECF flags, such as nothrow/const/pure. We do not want to merge
Re: [PATCH 3/5] IPA ICF pass
On 10/11/2014 10:19 AM, Jan Hubicka wrote: After few days of measurement and tuning, I was able to get numbers to the following shape: Execution times (seconds) phase setup : 0.00 ( 0%) usr 0.00 ( 0%) sys 0.00 ( 0%) wall 1412 kB ( 0%) ggc phase opt and generate : 27.83 (59%) usr 0.66 (19%) sys 28.52 (37%) wall 1028813 kB (24%) ggc phase stream in : 16.90 (36%) usr 0.63 (18%) sys 17.60 (23%) wall 3246453 kB (76%) ggc phase stream out: 2.76 ( 6%) usr 2.19 (63%) sys 31.34 (40%) wall 2 kB ( 0%) ggc callgraph optimization : 0.36 ( 1%) usr 0.00 ( 0%) sys 0.35 ( 0%) wall 40 kB ( 0%) ggc ipa dead code removal : 3.31 ( 7%) usr 0.01 ( 0%) sys 3.25 ( 4%) wall 0 kB ( 0%) ggc ipa virtual call target : 3.69 ( 8%) usr 0.03 ( 1%) sys 3.80 ( 5%) wall 21 kB ( 0%) ggc ipa devirtualization: 0.12 ( 0%) usr 0.00 ( 0%) sys 0.15 ( 0%) wall 13704 kB ( 0%) ggc ipa cp : 1.11 ( 2%) usr 0.07 ( 2%) sys 1.17 ( 2%) wall 188558 kB ( 4%) ggc ipa inlining heuristics : 8.17 (17%) usr 0.14 ( 4%) sys 8.27 (11%) wall 494738 kB (12%) ggc ipa comdats : 0.12 ( 0%) usr 0.00 ( 0%) sys 0.12 ( 0%) wall 0 kB ( 0%) ggc ipa lto gimple in : 1.86 ( 4%) usr 0.40 (11%) sys 2.20 ( 3%) wall 537970 kB (13%) ggc ipa lto gimple out : 0.19 ( 0%) usr 0.08 ( 2%) sys 0.27 ( 0%) wall 2 kB ( 0%) ggc ipa lto decl in : 12.20 (26%) usr 0.37 (11%) sys 12.64 (16%) wall 2441687 kB (57%) ggc ipa lto decl out: 2.51 ( 5%) usr 0.21 ( 6%) sys 2.71 ( 3%) wall 0 kB ( 0%) ggc ipa lto constructors in : 0.13 ( 0%) usr 0.02 ( 1%) sys 0.17 ( 0%) wall 15692 kB ( 0%) ggc ipa lto constructors out: 0.03 ( 0%) usr 0.00 ( 0%) sys 0.03 ( 0%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 0.54 ( 1%) usr 0.09 ( 3%) sys 0.63 ( 1%) wall 407182 kB (10%) ggc ipa lto decl merge : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.34 ( 2%) wall 8220 kB ( 0%) ggc ipa lto cgraph merge: 1.00 ( 2%) usr 0.00 ( 0%) sys 1.00 ( 1%) wall 14605 kB ( 0%) ggc whopr wpa : 0.92 ( 2%) usr 0.00 ( 0%) sys 0.89 ( 1%) wall 1 kB ( 0%) ggc whopr wpa I/O : 0.01 ( 0%) usr 1.90 (55%) sys 28.31 (37%) wall 0 kB ( 0%) ggc whopr partitioning : 2.81 ( 6%) usr 0.01 ( 0%) sys 2.83 ( 4%) wall 4943 kB ( 0%) ggc ipa reference : 1.34 ( 3%) usr 0.00 ( 0%) sys 1.35 ( 2%) wall 0 kB ( 0%) ggc ipa profile : 0.20 ( 0%) usr 0.01 ( 0%) sys 0.21 ( 0%) wall 0 kB ( 0%) ggc ipa pure const : 1.62 ( 3%) usr 0.00 ( 0%) sys 1.63 ( 2%) wall 0 kB ( 0%) ggc ipa icf : 2.65 ( 6%) usr 0.02 ( 1%) sys 2.68 ( 3%) wall 1352 kB ( 0%) ggc inline parameters : 0.00 ( 0%) usr 0.01 ( 0%) sys 0.00 ( 0%) wall 0 kB ( 0%) ggc tree SSA rewrite: 0.11 ( 0%) usr 0.01 ( 0%) sys 0.08 ( 0%) wall 18919 kB ( 0%) ggc tree SSA other : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc tree SSA incremental: 0.24 ( 1%) usr 0.01 ( 0%) sys 0.32 ( 0%) wall 11325 kB ( 0%) ggc tree operand scan : 0.15 ( 0%) usr 0.02 ( 1%) sys 0.18 ( 0%) wall 116283 kB ( 3%) ggc dominance frontiers : 0.01 ( 0%) usr 0.00 ( 0%) sys 0.02 ( 0%) wall 0 kB ( 0%) ggc dominance computation : 0.13 ( 0%) usr 0.01 ( 0%) sys 0.16 ( 0%) wall 0 kB ( 0%) ggc varconst: 0.01 ( 0%) usr 0.02 ( 1%) sys 0.01 ( 0%) wall 0 kB ( 0%) ggc loop fini : 0.02 ( 0%) usr 0.00 ( 0%) sys 0.04 ( 0%) wall 0 kB ( 0%) ggc unaccounted todo: 0.55 ( 1%) usr 0.00 ( 0%) sys 0.56 ( 1%) wall 0 kB ( 0%) ggc TOTAL : 47.49 3.4877.46 4276682 kB and I was able to reduce function bodies loaded in WPA to 35% (from previous 55%). The main problem 35% means that 35% of all function bodies are compared with something else? That feels pretty high. but overall numbers are not so terrible. Currently, the pass is able to merge 32K functions. As you know, we group functions to so called classes. According to stats, average non-singular class size contains at the end of comparison 7.39 candidates and we have 5K such functions. Because we load body for each candidate in such groups, it gives us minimum number of loaded bodies: 37K. As we load 70K function, we have still place to improve. But I guess WPA body-less comparison is quite efficient. with speed was hidden in work list for congruence classes, where hash_set was used. I chose the data structure to support delete operation, but it was really slow. Thus, hash_set was replaced with linked list and a flag is used to identify if a set is
Re: [PATCH 3/5] IPA ICF pass
On 10/11/2014 02:05 AM, Martin Liška wrote: On 09/26/2014 09:46 PM, Jan Hubicka wrote: Hi, this is on ipa-icf-gimple.c @@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void) { if (verify_edge_corresponds_to_fndecl (e, decl)) { - error (edge points to wrong declaration:); - debug_tree (e-callee-decl); - fprintf (stderr, Instead of:); - debug_tree (decl); - error_found = true; + /* The edge can be redirected in WPA by IPA ICF. +Following check really ensures that it's +not the case. */ + + cgraph_node *current_node = cgraph_node::get (decl); + if (!current_node || !current_node-icf_merged) I would move this into verify_edge_corresponds_to_fndecl. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c new file mode 100644 index 000..7031eaa --- /dev/null +++ b/gcc/ipa-icf-gimple.c @@ -0,0 +1,384 @@ +/* Interprocedural Identical Code Folding pass + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska mli...@suse.cz + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ Please add toplevel comment about what the code does and how to use it. +namespace ipa_icf { + +/* Basic block equivalence comparison function that returns true if + basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond. */ ... to each other? I would add short comment that as comparsion goes you build voclabulary of equivalences of variables/ssanames etc. So people reading the code do not get lost at very beggining. + +bool +func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) +{ + unsigned i; + gimple_stmt_iterator gsi1, gsi2; + gimple s1, s2; + + if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count + || bb1-edge_count != bb2-edge_count) +return RETURN_FALSE (); The UPPERCASE looks ugly. I see that RETURN_FALSE is a warpper for return_false_with_msg that outputs line and file information. I would make it lowercase even if it is macro. You may consider using CXX_MEM_STAT_INFO style default argument to avoid function macro completely. Probably not big win given that it won't save you from preprocesor mess. + + gsi1 = gsi_start_bb (bb1-bb); + gsi2 = gsi_start_bb (bb2-bb); + + for (i = 0; i bb1-nondbg_stmt_count; i++) +{ + if (is_gimple_debug (gsi_stmt (gsi1))) + gsi_next_nondebug (gsi1); + + if (is_gimple_debug (gsi_stmt (gsi2))) + gsi_next_nondebug (gsi2); + + s1 = gsi_stmt (gsi1); + s2 = gsi_stmt (gsi2); + + if (gimple_code (s1) != gimple_code (s2)) + return RETURN_FALSE_WITH_MSG (gimple codes are different); I think you need to compare EH here. Consider case where one unit is compiled with -fno-exception and thus all EH regions are removed, while other function has EH regions in it. Those are not equivalent. EH region is obtained by lookup_stmt_eh and then you need to comapre them for match as you do with gimple_resx_regoin. + t1 = gimple_call_fndecl (s1); + t2 = gimple_call_fndecl (s2); + + /* Function pointer variables are not supported yet. */ They seems to be, compare_operand seems just right. + +/* Verifies for given GIMPLEs S1 and S2 that + label statements are semantically equivalent. */ + +bool +func_checker::compare_gimple_label (gimple g1, gimple g2) +{ + if (m_ignore_labels) +return true; + + tree t1 = gimple_label_label (g1); + tree t2 = gimple_label_label (g2); + + return compare_tree_ssa_label (t1, t2); +} I would expect the main BB loop to record BB in which label belongs to and the BB assciatio neing checked here. Otherwise I do not see how switch statements are compared to not have different permutations of targets. Also note that one BB may have multiple labels in them and they are equivalent. Also I would punt on occurence of FORCED_LABEL. Those are tricky as they may be passed around and compared for address and no one really defines what should happen. Better to avoid those. Hi. I will remove this support in the pass. + +/* Verifies for given
[RFC, PATCH]: Introduction of callgraph annotation class
Hello. Following patch introduces a new class called callgraph_annotation. Idea behind the patch is to provide a generic interface one can use to register custom info related to a cgraph_node. As you know, symbol_table provides hooks for creation, deletion and duplication of a cgraph_node. If you have a pass, you need to handle all these hooks and store custom data in your data structure. As an example, after discussion with Martin, I chose usage in ipa-prop.h: data structure: vecipa_node_params ipa_node_params_vector if the pass handles an event, following chunk is executed: if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid) ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1); The problem is that you can have sparse UIDs of cgraph_nodes and every time you have to allocate a vector of size equal to cgraph_max_uid. As a replacement, I implemented first version of cgraph_annotation that internally uses hash_mapcgraph_unique_identifier, T. Every time a node is deleted, we remove corresponding data associated to the node. What do you think about it? Thank you, Martin diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 04ce0c0..bf34c96 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1171,6 +1171,7 @@ OBJS = \ cfgrtl.o \ symtab.o \ cgraph.o \ + annotation.o \ cgraphbuild.o \ cgraphunit.o \ cgraphclones.o \ diff --git a/gcc/annotation.c b/gcc/annotation.c new file mode 100644 index 000..a8b6053 --- /dev/null +++ b/gcc/annotation.c @@ -0,0 +1 @@ +#include annotation.h diff --git a/gcc/annotation.h b/gcc/annotation.h new file mode 100644 index 000..7520677 --- /dev/null +++ b/gcc/annotation.h @@ -0,0 +1,285 @@ +/* Annotations handling code. + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by Martin Liska + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +http://www.gnu.org/licenses/. */ + +#ifndef GCC_ANNOTATION_H +#define GCC_ANNOTATION_H + +#include config.h +#include system.h +#include coretypes.h +#include tm.h +#include tree.h +#include varasm.h +#include calls.h +#include print-tree.h +#include tree-inline.h +#include langhooks.h +#include hashtab.h +#include toplev.h +#include flags.h +#include debug.h +#include target.h +#include cgraph.h +#include hash-map.h + +#define ANNOTATION_DELETED_VALUE -1 +#define ANNOTATION_EMPTY_VALUE 0 + +struct annotation_hashmap_traits: default_hashmap_traits +{ + static inline + hashval_t hash (const int v) + { +return (hashval_t)v; + } + + templatetypename T + static inline + bool is_deleted (T e) + { +return e.m_key == ANNOTATION_DELETED_VALUE; + } + + templatetypename T + static inline + bool is_empty (T e) + { +return e.m_key == ANNOTATION_EMPTY_VALUE; + } + + templatetypename T + static inline + void mark_deleted (T e) + { +e.m_key = ANNOTATION_DELETED_VALUE; + } + + templatetypename T + static inline + void mark_empty (T e) + { +e.m_key = ANNOTATION_EMPTY_VALUE; + } +}; + +template class T +class cgraph_annotation +{ +public: + /* Default construction takes SYMTAB as an argument. */ + cgraph_annotation (symbol_table *symtab): m_symtab (symtab) + { +cgraph_node *node; + +FOR_EACH_FUNCTION (node) +{ + gcc_assert (node-annotation_uid 0); + m_reverse_map.put (node, node-annotation_uid); +} + +m_map = new hash_mapint, T*, annotation_hashmap_traits(); + +m_symtab_insertion_hook = + symtab-add_cgraph_insertion_hook + (cgraph_annotation::symtab_insertion, this); + +m_symtab_removal_hook = + symtab-add_cgraph_removal_hook + (cgraph_annotation::symtab_removal, this); +m_symtab_duplication_hook = + symtab-add_cgraph_duplication_hook + (cgraph_annotation::symtab_duplication, this); + + } + + /* Destructor. */ + ~cgraph_annotation () + { +m_symtab-remove_cgraph_insertion_hook (m_symtab_insertion_hook); +m_symtab-remove_cgraph_removal_hook (m_symtab_removal_hook); +m_symtab-remove_cgraph_duplication_hook (m_symtab_duplication_hook); + +m_map-traverse void *, cgraph_annotation::release (NULL); + } + + /* Traverses all annotations with a function F called with + ARG as argument. */ + templatetypename Arg, bool (*f)(const T , Arg) + void traverse (Arg a) const + { +m_map-traverse f (a); + } + + /* Function for registering insertion hook. */ + template void (*f) (const
Re: [PATCH 3/5] IPA ICF pass
On 10/14/2014 06:04 PM, Jan Hubicka wrote: diff --git a/gcc/cgraph.h b/gcc/cgraph.h index fb41b01..2de98b4 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -172,6 +172,12 @@ public: /* Dump referring in list to FILE. */ void dump_referring (FILE *); + /* Get number of references for this node. */ + inline unsigned get_references_count (void) + { +return ref_list.references ? ref_list.references-length () : 0; + } Probably better called num_references() (like we have num_edge in basic-block.h) @@ -8068,6 +8069,19 @@ it may significantly increase code size (see @option{--param ipcp-unit-growth=@var{value}}). This flag is enabled by default at @option{-O3}. +@item -fipa-icf +@opindex fipa-icf +Perform Identical Code Folding for functions and read-only variables. +The optimization reduces code size and may disturb unwind stacks by replacing +a function by equivalent one with a different name. The optimization works +more effectively with link time optimization enabled. + +Nevertheless the behavior is similar to Gold Linker ICF optimization, GCC ICF +works on different levels and thus the optimizations are not same - there are +equivalences that are found only by GCC and equivalences found only by Gold. + +This flag is enabled by default at @option{-O2}. ... and -Os? +case ARRAY_REF: +case ARRAY_RANGE_REF: + { + x1 = TREE_OPERAND (t1, 0); + x2 = TREE_OPERAND (t2, 0); + y1 = TREE_OPERAND (t1, 1); + y2 = TREE_OPERAND (t2, 1); + + if (!compare_operand (array_ref_low_bound (t1), + array_ref_low_bound (t2))) + return return_false_with_msg (); + if (!compare_operand (array_ref_element_size (t1), + array_ref_element_size (t2))) + return return_false_with_msg (); + if (!compare_operand (x1, x2)) + return return_false_with_msg (); + return compare_operand (y1, y2); + } No need for {...} if there are no local vars. +bool +func_checker::compare_function_decl (tree t1, tree t2) +{ + bool ret = false; + + if (t1 == t2) +return true; + + symtab_node *n1 = symtab_node::get (t1); + symtab_node *n2 = symtab_node::get (t2); + + if (m_ignored_source_nodes != NULL m_ignored_target_nodes != NULL) +{ + ret = m_ignored_source_nodes-contains (n1) +m_ignored_target_nodes-contains (n2); + + if (ret) + return true; +} + + /* If function decl is WEAKREF, we compare targets. */ + cgraph_node *f1 = cgraph_node::get (t1); + cgraph_node *f2 = cgraph_node::get (t2); + + if(f1 f2 f1-weakref f2-weakref) +ret = f1-alias_target == f2-alias_target; + + return ret; Comparing aliases is bit more complicated than just handling weakrefs. I have patch for symtab_node::equivalent_address_p somewhre in queue. lets just drop the fancy stuff for the moment and compare f1f2 for equivalence. + ret = compare_decl (t1, t2); Why functions are not compared with compare_decl while variables are? + + return return_with_debug (ret); +} + +void +func_checker::parse_labels (sem_bb *bb) +{ + for (gimple_stmt_iterator gsi = gsi_start_bb (bb-bb); !gsi_end_p (gsi); + gsi_next (gsi)) +{ + gimple stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) == GIMPLE_LABEL) + { + tree t = gimple_label_label (stmt); + gcc_assert (TREE_CODE (t) == LABEL_DECL); + + m_label_bb_map.put (t, bb-bb-index); + } +} +} + +/* Basic block equivalence comparison function that returns true if + basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond. + + In general, a collection of equivalence dictionaries is built for types + like SSA names, declarations (VAR_DECL, PARM_DECL, ..). This infrastructure + is utilized by every statement-by-stament comparison function. */ + +bool +func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2) +{ + unsigned i; + gimple_stmt_iterator gsi1, gsi2; + gimple s1, s2; + + if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count + || bb1-edge_count != bb2-edge_count) +return return_false (); + + gsi1 = gsi_start_bb (bb1-bb); + gsi2 = gsi_start_bb (bb2-bb); + + for (i = 0; i bb1-nondbg_stmt_count; i++) +{ + if (is_gimple_debug (gsi_stmt (gsi1))) + gsi_next_nondebug (gsi1); + + if (is_gimple_debug (gsi_stmt (gsi2))) + gsi_next_nondebug (gsi2); + + s1 = gsi_stmt (gsi1); + s2 = gsi_stmt (gsi2); + + int eh1 = lookup_stmt_eh_lp_fn + (DECL_STRUCT_FUNCTION (m_source_func_decl), s1); + int eh2 = lookup_stmt_eh_lp_fn + (DECL_STRUCT_FUNCTION (m_target_func_decl), s2); + + if (eh1 != eh2) + return return_false_with_msg (EH regions are different); + + if (gimple_code (s1) != gimple_code (s2)) + return return_false_with_msg (gimple codes are different); + + switch (gimple_code (s1)) + { + case GIMPLE_CALL: + if (!compare_gimple_call
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 01:31 PM, Richard Biener wrote: On Wed, Oct 15, 2014 at 6:26 PM, Martin Liška mli...@suse.cz wrote: Hello. Following patch introduces a new class called callgraph_annotation. Idea behind the patch is to provide a generic interface one can use to register custom info related to a cgraph_node. As you know, symbol_table provides hooks for creation, deletion and duplication of a cgraph_node. If you have a pass, you need to handle all these hooks and store custom data in your data structure. As an example, after discussion with Martin, I chose usage in ipa-prop.h: data structure: vecipa_node_params ipa_node_params_vector if the pass handles an event, following chunk is executed: if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid) ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1); The problem is that you can have sparse UIDs of cgraph_nodes and every time you have to allocate a vector of size equal to cgraph_max_uid. As a replacement, I implemented first version of cgraph_annotation that internally uses hash_mapcgraph_unique_identifier, T. Every time a node is deleted, we remove corresponding data associated to the node. What do you think about it? I don't like generic annotation facilities at all. Would it be possible to make cgraph UIDs not sparse? (keep a free-list of cgraph nodes with UID cgraph_max_uid, only really free nodes at the end) Using a different data structure than a vector indexed by cgraph UID should also be easily possible (a map from UID to data, hash_map int, T). Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. Martin Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 02:01 PM, Jan Hubicka wrote: Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? We could, if we have way to update the summaries that are currently UID allocated. With annotation template we could have handle to do that more easily than diving into each of passes maintaining summaries by hand. Agree with that, I will be central point one can implement these optimizations. One idea is to implement lazy allocation where we can allocate memory just in case someone calls annotation::get method. On the other hand it still does not make the records quite dense in cases 1) you do not want to have separate records for clones because you know clones and master are identical It would be quite easy to implement annotation::get_for_origin (int clone_id), where we find origin for the clone and return data associated to such origin node. 2) you care only about definitions Maybe similar stuff? Martin ... At some point we discussed introducing separate UIDs for those but that was also not very welcome (and I agree we already have bit too many UIDs for functions - DECL_UID, node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite finished it. Honza Richard. Martin Richard. Thank you, Martin
Re: [RFC, PATCH]: Introduction of callgraph annotation class
On 10/16/2014 02:05 PM, Martin Liška wrote: On 10/16/2014 02:01 PM, Jan Hubicka wrote: Hello. If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is given to different nodes: symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from perspective that it cannot be used as a index to a vector. It was also Honza's note that one can choose inner implementation of such annotation class. We can implement both sparse (hash_map) and consecutive vector data structure. According to first numbers I was given, Inkscape allocates about ~64k cgraph_nodes in WPA. After function merging is processed, it shrinks to about a half. So that, our free list contains the half of nodes. If we use consecutive vector, our memory impact is bigger thank necessary. I don't think there is anything that forces us to retain the original UID allocation after WPA merging? So why not compact it? We could, if we have way to update the summaries that are currently UID allocated. With annotation template we could have handle to do that more easily than diving into each of passes maintaining summaries by hand. Agree with that, I will be central point one can implement these optimizations. s/I/it One idea is to implement lazy allocation where we can allocate memory just in case someone calls annotation::get method. On the other hand it still does not make the records quite dense in cases 1) you do not want to have separate records for clones because you know clones and master are identical It would be quite easy to implement annotation::get_for_origin (int clone_id), where we find origin for the clone and return data associated to such origin node. 2) you care only about definitions Maybe similar stuff? Martin ... At some point we discussed introducing separate UIDs for those but that was also not very welcome (and I agree we already have bit too many UIDs for functions - DECL_UID, node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite finished it. Honza Richard. Martin Richard. Thank you, Martin
IPA ICF fallout: i586 bootstrap failure fix
Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. (sem_item_optimizer::dump_cong_classes): Likewise. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index 4e73849..f7510b3 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -2093,7 +2093,7 @@ sem_item_optimizer::process_cong_reduction (void) if (dump_file) fprintf (dump_file, Worklist has been filled with: %lu\n, - worklist.size ()); + (unsigned long) worklist.size ()); if (dump_file (dump_flags TDF_DETAILS)) fprintf (dump_file, Congruence class reduction\n); @@ -2113,7 +2113,7 @@ sem_item_optimizer::dump_cong_classes (void) fprintf (dump_file, Congruence classes: %u (unique hash values: %lu), with total: %u items\n, - m_classes_count, m_classes.elements(), m_items.length ()); + m_classes_count, (unsigned long) m_classes.elements(), m_items.length ()); /* Histogram calculation. */ unsigned int max_index = 0;
Re: IPA ICF fallout: i586 bootstrap failure fix
On 10/16/2014 02:45 PM, Jakub Jelinek wrote: On Thu, Oct 16, 2014 at 02:41:36PM +0200, Martin Liška wrote: Hello. Following patch fixes i586 bootstrap failure: https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html ../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ [-Werror=format=] worklist.size ()); ^ ../../src-trunk/gcc/ipa-icf.c: In member function ‘void ipa_icf::sem_item_optimizer::dump_cong_classes()’: ../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ [-Werror=format=] m_classes_count, m_classes.elements(), m_items.length ()); Ready for thunk? Thank you, Martin gcc/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast added. Too long line, please wrap. I'd write Cast to unsigned long. instead. Thank you for note, applied as suggested. Martin (sem_item_optimizer::dump_cong_classes): Likewise. Ok. Jakub
IPA ICF fallout: fix for two ipa-icf-*.C tests
Hello. I forgot that RUNTESTFLAGS=ipa.exp does not execute *.C tests cases residing in testsuite/g++.dg/ipa/ folder. The patch fixes two of my newly added tests. Considered as obvious. Thank you, Martin gcc/testsuite/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * g++.dg/ipa/ipa-icf-4.C: Correct number of equivalences set. * g++.dg/ipa/ipa-icf-5.C: Likewise diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C index 9d17889..9434289 100644 --- a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C +++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C @@ -44,5 +44,5 @@ int main() } /* { dg-final { scan-ipa-dump Varpool alias has been created icf } } */ -/* { dg-final { scan-ipa-dump Equal symbols: 2 icf } } */ +/* { dg-final { scan-ipa-dump Equal symbols: 6 icf } } */ /* { dg-final { cleanup-ipa-dump icf } } */ diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C index 728df20..7dbce88 100644 --- a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C +++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C @@ -19,6 +19,5 @@ int main() return myarray.a - myarray_alias.a; } -/* { dg-final { scan-ipa-dump Varpool alias cannot be created \\(alias cycle\\). icf } } */ -/* { dg-final { scan-ipa-dump Equal symbols: 1 icf } } */ +/* { dg-final { scan-ipa-dump Equal symbols: 0 icf } } */ /* { dg-final { cleanup-ipa-dump icf } } */
IPA ICF fallout: fox for libasan and pr43077-1.c
Hello. After IRC discussion with Jakub, I disable IPA ICF for ASAN. Second part of the patch contains fix for pr43077-1.c. Considered as pre-approved. Thanks, Martin gcc/testsuite/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * gcc.dg/guality/pr43077-1.c: IPA ICF disabled to match defined expectations. libsanitizer/ChangeLog: 2014-10-16 Martin Liska mli...@suse.cz * asan/Makefile.am: IPA ICF pass is disabled. * asan/Makefile.in: Likewise. diff --git a/gcc/testsuite/gcc.dg/guality/pr43077-1.c b/gcc/testsuite/gcc.dg/guality/pr43077-1.c index d8d5eeb..84bd59e 100644 --- a/gcc/testsuite/gcc.dg/guality/pr43077-1.c +++ b/gcc/testsuite/gcc.dg/guality/pr43077-1.c @@ -1,6 +1,6 @@ /* PR debug/43077 */ /* { dg-do run } */ -/* { dg-options -g } */ +/* { dg-options -g -fno-ipa-icf } */ int varb; diff --git a/libsanitizer/asan/Makefile.am b/libsanitizer/asan/Makefile.am index 12f20ae..d499c72 100644 --- a/libsanitizer/asan/Makefile.am +++ b/libsanitizer/asan/Makefile.am @@ -7,7 +7,7 @@ DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D if USING_MAC_INTERPOSE DEFS += -DMAC_INTERPOSE_FUNCTIONS -DMISSING_BLOCKS_SUPPORT endif -AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros +AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS) ACLOCAL_AMFLAGS = -I $(top_srcdir) -I $(top_srcdir)/config diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in index 862eec4..00a614b 100644 --- a/libsanitizer/asan/Makefile.in +++ b/libsanitizer/asan/Makefile.in @@ -269,7 +269,8 @@ gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER) AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \ -Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \ -fomit-frame-pointer -funwind-tables -fvisibility=hidden \ - -Wno-variadic-macros $(LIBSTDCXX_RAW_CXX_CXXFLAGS) + -Wno-variadic-macros -fno-ipa-icf \ + $(LIBSTDCXX_RAW_CXX_CXXFLAGS) ACLOCAL_AMFLAGS = -I $(top_srcdir) -I $(top_srcdir)/config toolexeclib_LTLIBRARIES = libasan.la nodist_toolexeclib_HEADERS = libasan_preinit.o
[PATCH,i686]: Temporary fir for PR63566
Hello. After IRC discussion, IPA ICF will set local flag to false for both original and node that becomes an alias. That will enforce equal calling convention to be use. i686-pc-linux bootstrap has been still running, I will commit the fix as soon as it finishes. I consider it as pre-approved. Thanks you, Martin gcc/ChangeLog: 2014-10-17 Martin Liska mli...@suse.cz * ipa-icf.c (sem_function::merge): Local flags are set to false to enforce equal calling convention to be used. * opts.c (common_handle_option): Indentation fix. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index f7510b3..0e6bd9a 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -630,6 +630,11 @@ sem_function::merge (sem_item *alias_item) cgraph_node::create_alias (alias_func-decl, decl); alias-resolve_alias (original); + /* Workaround for PR63566 that forces equal calling convention + to be used. */ + alias-local.local = false; + original-local.local = false; + if (dump_file) fprintf (dump_file, Callgraph alias has been created.\n\n); } diff --git a/gcc/opts.c b/gcc/opts.c index dc8ddf4..3054196 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1982,8 +1982,8 @@ common_handle_option (struct gcc_options *opts, break; case OPT_fipa_icf: - opts-x_flag_ipa_icf_functions = value; - opts-x_flag_ipa_icf_variables = value; + opts-x_flag_ipa_icf_functions = value; + opts-x_flag_ipa_icf_variables = value; break; default:
[PATCH] Fix for PR63569
Hello. Following patch fixes PR63569. Bootstrap executed on ppc64-linux and no regression seen on x86_64-pc-linux. Ready for trunk? Thank you, Martin gcc/testsuite/ChangeLog: 2014-10-17 Martin Liska mli...@suse.cz * gcc.dg/ipa/ipa-icf-31.c: New test. gcc/ChangeLog: 2014-10-17 Martin Liska mli...@suse.cz * ipa-icf-gimple.c (func_checker::compare_volatility): New function. (func_checker::compare_gimple_call): Volatility check added. (func_checker::compare_gimple_assign): Likewise. * ipa-icf-gimple.h: New function. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 792a3e4..1b9ee85 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -452,6 +452,17 @@ func_checker::compare_tree_list_operand (tree t1, tree t2) return true; } +/* Compares if both trees T1 and T2 have equal volatility. */ + +bool +func_checker::compare_volatility (tree t1, tree t2) +{ + if (t1 t2) +return TREE_THIS_VOLATILE (t1) == TREE_THIS_VOLATILE (t2); + + return !(t1 || t2); +} + /* Verifies that trees T1 and T2, representing function declarations are equivalent from perspective of ICF. */ @@ -663,6 +674,9 @@ func_checker::compare_gimple_call (gimple s1, gimple s2) t1 = gimple_get_lhs (s1); t2 = gimple_get_lhs (s2); + if (!compare_volatility (t1, t2)) +return return_false_with_msg (different volatility for call statement); + return compare_operand (t1, t2); } @@ -696,8 +710,11 @@ func_checker::compare_gimple_assign (gimple s1, gimple s2) if (!compare_operand (arg1, arg2)) return false; -} + if (!compare_volatility (arg1, arg2)) + return return_false_with_msg (different volatility for assignment + statement); +} return true; } diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h index 8487a2a..b791c21 100644 --- a/gcc/ipa-icf-gimple.h +++ b/gcc/ipa-icf-gimple.h @@ -209,6 +209,10 @@ public: two trees are semantically equivalent. */ bool compare_tree_list_operand (tree t1, tree t2); + /* Compares two tree list operands T1 and T2 and returns true if these + two trees are semantically equivalent. */ + bool compare_volatility (tree t1, tree t2); + /* Verifies that trees T1 and T2, representing function declarations are equivalent from perspective of ICF. */ bool compare_function_decl (tree t1, tree t2); diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c new file mode 100644 index 000..e70d72d --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options -fipa-icf -fdump-ipa-icf-details } */ + + +static int f(int t, int *a) __attribute__((noinline)); + +static int g(int t, volatile int *a) __attribute__((noinline)); +static int g(int t, volatile int *a) +{ + int i; + int tt = 0; + for(i=0;it;i++) +tt += *a; + return tt; +} +static int f(int t, int *a) +{ + int i; + int tt = 0; + for(i=0;it;i++) +tt += *a; + return tt; +} + + +int main() +{ + return 0; +} + +/* { dg-final { scan-ipa-dump Equal symbols: 0 icf } } */ +/* { dg-final { scan-ipa-dump different volatility for assignment statement icf } } */ +/* { dg-final { cleanup-ipa-dump icf } } */
[PATCH] Fix for PR63583
Hello. I added missing gimple_asm_string comparison for a function with an asm statement. Bootstrap and regression tests still running, ready for trunk after it finishes? Thank you, Martin gcc/ChangeLog: 2014-10-19 Martin Liska mli...@suse.cz * ipa-icf-gimple.c (func_checker::compare_gimple_asm): Gimple tempate string is compared. gcc/testsuite/ChangeLog: 2014-10-19 Martin Liska mli...@suse.cz * gcc.dg/ipa/pr63595.c: New test. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 792a3e4..1369b74 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -863,6 +863,9 @@ func_checker::compare_gimple_asm (gimple g1, gimple g2) if (gimple_asm_nclobbers (g1) != gimple_asm_nclobbers (g2)) return false; + if (strcmp (gimple_asm_string (g1), gimple_asm_string (g2)) != 0) +return return_false_with_msg (ASM strings are different); + for (unsigned i = 0; i gimple_asm_ninputs (g1); i++) { tree input1 = gimple_asm_input_op (g1, i); diff --git a/gcc/testsuite/gcc.dg/ipa/pr63595.c b/gcc/testsuite/gcc.dg/ipa/pr63595.c new file mode 100644 index 000..9c9f3bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr63595.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fdump-ipa-icf-details } */ + +static int f(int t) __attribute__((noinline)); + +static int g(int t) __attribute__((noinline)); +static int g(int t) +{ +asm(addl %0, 1: +r(t)); + return t; +} +static int f(int t) +{ +asm(addq %0, -1: +r(t)); + return t; +} + + +int h(int t) +{ +return f(t) + g(t); +} + +/* { dg-final { scan-ipa-dump ASM strings are different icf } } */ +/* { dg-final { scan-ipa-dump Equal symbols: 0 icf } } */ +/* { dg-final { cleanup-ipa-dump icf } } */
[PATCH] g++.dg: add ipa.exp file
Hi, I would like to add corresponding ipa.exp file for g++ that let me run: make -k check RUNTESTFLAGS=ipa.exp Changelog: 2014-03-28 Martin Liska mli...@suse.cz * g++.dg/ipa.epx: Anologous file added to g++.dg folder. OK for trunk? Thank you, Martin diff --git a/gcc/testsuite/g++.dg/ipa/ipa.exp b/gcc/testsuite/g++.dg/ipa/ipa.exp new file mode 100644 index 000..af7b8a7 --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/ipa.exp @@ -0,0 +1,35 @@ +# Copyright (C) 1997-2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# http://www.gnu.org/licenses/. + +# G++ testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib g++-dg.exp + +# If a testcase doesn't have special options, use these. +global DEFAULT_CXXFLAGS +if ![info exists DEFAULT_CXXFLAGS] then { +set DEFAULT_CXXFLAGS -pedantic-errors -Wno-long-long +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[C\]]] $DEFAULT_CXXFLAGS + +# All done. +dg-finish
Re: Fix indirect call profiling for COMDAT symbols
On 04/11/2014 08:00 AM, Jan Hubicka wrote: Hi, while looking into firefox profiles, I noticed that we miss devirtualizations to comdat symbols, because we manage to get different profile_id in each unit. This is easily fixed by the following patch that makes profiled_id to by crc32 of the symbol name in this case. Bootstrapped/regtested x86_64-linux, tested with firefox, will commit it tomorrow. * coverage.c (coverage_compute_profile_id): Make stable for global symbols * ipa-utils.c (ipa_merge_profiles): Merge profile_id. * lto/lto-symtab.c (lto_cgraph_replace_node): Don't re-merge tp_first_run. Index: coverage.c === --- coverage.c (revision 209170) +++ coverage.c (working copy) @@ -555,18 +555,31 @@ coverage_compute_lineno_checksum (void) unsigned coverage_compute_profile_id (struct cgraph_node *n) { - expanded_location xloc -= expand_location (DECL_SOURCE_LOCATION (n-decl)); - unsigned chksum = xloc.line; + unsigned chksum; - chksum = coverage_checksum_string (chksum, xloc.file); - chksum = coverage_checksum_string -(chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl))); - if (first_global_object_name) -chksum = coverage_checksum_string - (chksum, first_global_object_name); - chksum = coverage_checksum_string -(chksum, aux_base_name); + /* Externally visible symbols have unique name. */ + if (TREE_PUBLIC (n-decl) || DECL_EXTERNAL (n-decl)) +{ + /* Do not use coverage_checksum_string here; we really want unique +symbol name id. */ + chksum = crc32_string + (0, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl))); +} + else +{ + expanded_location xloc + = expand_location (DECL_SOURCE_LOCATION (n-decl)); + + chksum = xloc.line; + chksum = coverage_checksum_string (chksum, xloc.file); + chksum = coverage_checksum_string + (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl))); + if (first_global_object_name) + chksum = coverage_checksum_string + (chksum, first_global_object_name); + chksum = coverage_checksum_string + (chksum, aux_base_name); +} /* Non-negative integers are hopefully small enough to fit in all targets. */ return chksum 0x7fff; Index: ipa-utils.c === --- ipa-utils.c (revision 209170) +++ ipa-utils.c (working copy) @@ -660,6 +660,21 @@ ipa_merge_profiles (struct cgraph_node * if (dst-tp_first_run src-tp_first_run src-tp_first_run) dst-tp_first_run = src-tp_first_run; + if (src-profile_id) +{ + if (!dst-profile_id) + dst-profile_id = src-profile_id; + else + { +if (src-profile_id != dst-profile_id) + { +dump_cgraph_node (stderr, src); +dump_cgraph_node (stderr, dst); + } +gcc_assert (src-profile_id == dst-profile_id); + } +} + if (!dst-count) return; if (cgraph_dump_file) Index: lto/lto-symtab.c === --- lto/lto-symtab.c(revision 209170) +++ lto/lto-symtab.c(working copy) @@ -91,12 +91,6 @@ lto_cgraph_replace_node (struct cgraph_n if (node-decl != prevailing_node-decl) cgraph_release_function_body (node); - /* Time profile merging */ - if (node-tp_first_run) -prevailing_node-tp_first_run = prevailing_node-tp_first_run ? - MIN (prevailing_node-tp_first_run, node-tp_first_run) : - node-tp_first_run; - Hello Honza, I just want to ask if this time profile merging is not necessary any more? Martin /* Finally remove the replaced node. */ cgraph_remove_node (node); }
Re: Avoid unnecesary GGC runs during LTO
On 04/11/2014 08:07 AM, Jan Hubicka wrote: Hi, while looking into -ftime-report, I noticed that ggc can take up to 10% of WPA memory while it does almost nothing: it is run just after streaming that explicitly frees memory that becomes unreachable. The first GGC run usually saves at most 1% of memory and then it is never run again. I believe this ought to also help in case we get into swap, since ltranses will also ggc less. Bootstrapped/regtested x86_64-linux, OK? Hi! I applied both patches you sent today and there are Firefox LTO -O3 results: https://drive.google.com/file/d/0B0pisUJ80pO1ajRzLWFneTJpcE0/edit?usp=sharing It shows that you saved a bit memory in WPA. Martin Honza * lto.c (read_cgraph_and_symbols): Grow ggc memory after streaming. * ggc.h (ggc_grow): New function. * ggc-none.c (ggc_grow): New function. * ggc-page.c (ggc_grow): Likewise. Index: ggc.h === --- ggc.h (revision 209170) +++ ggc.h (working copy) @@ -225,6 +225,9 @@ extern const char *ggc_alloc_string_stat function is called, not during allocations. */ extern void ggc_collect (void); +/* Assume that all GGC memory is reachable and grow the limits for next collection. */ +extern void ggc_grow (void); + /* Register an additional root table. This can be useful for some plugins. Does nothing if the passed pointer is NULL. */ extern void ggc_register_root_tab (const struct ggc_root_tab *); Index: lto/lto.c === --- lto/lto.c (revision 209170) +++ lto/lto.c (working copy) @@ -2999,6 +3000,10 @@ read_cgraph_and_symbols (unsigned nfiles gimple_canonical_types = NULL; delete canonical_type_hash_cache; canonical_type_hash_cache = NULL; + + /* At this stage we know that majority of GGC memory is reachable. + Growing the limits prevents unnecesary invocation of GGC. */ + ggc_grow (); ggc_collect (); /* Set the hooks so that all of the ipa passes can read in their data. */ Index: ggc-none.c === --- ggc-none.c (revision 209170) +++ ggc-none.c (working copy) @@ -63,3 +63,8 @@ ggc_free (void *p) { free (p); } + +void +ggc_grow (void) +{ +} Index: ggc-page.c === --- ggc-page.c (revision 209170) +++ ggc-page.c (working copy) @@ -2095,6 +2095,19 @@ ggc_collect (void) fprintf (G.debug_file, END COLLECTING\n); } +/* Assume that all GGC memory is reachable and grow the limits for next collection. */ + +void +ggc_grow (void) +{ +#ifndef ENABLE_CHECKING + G.allocated_last_gc = MAX (G.allocated_last_gc, +G.allocated); +#endif + if (!quiet_flag) +fprintf (stderr, {GC start %luk} , (unsigned long) G.allocated / 1024); +} + /* Print allocation statistics. */ #define SCALE(x) ((unsigned long) ((x) 1024*10 \ ? (x) \
Re: ipa-visibility TLC 2/n
On 05/25/2014 07:54 AM, Jan Hubicka wrote: Hi, this patch adds code to rerite references in vtable initializers to local aliases when doing so is a win. Bootstrapped/regtested x86_64-linux, comitted. Honza * ipa-visibility.c (can_replace_by_local_alias_in_vtable): New function. (update_vtable_references): New function. (function_and_variable_visibility): Rewrite also vtable initializers. * varpool.c (cgraph_variable_initializer_availability): Remove assert. Index: varpool.c === --- varpool.c (revision 210908) +++ varpool.c (working copy) @@ -355,7 +355,6 @@ varpool_add_new_variable (tree decl) enum availability cgraph_variable_initializer_availability (varpool_node *node) { - gcc_assert (cgraph_function_flags_ready); if (!node-definition) return AVAIL_NOT_AVAILABLE; if (!TREE_PUBLIC (node-decl)) Index: ipa-visibility.c === --- ipa-visibility.c(revision 210908) +++ ipa-visibility.c(working copy) @@ -343,6 +343,36 @@ can_replace_by_local_alias (symtab_node !symtab_can_be_discarded (node)); } +/* Return true if we can replace refernece to NODE by local alias Hello Jan, s/refernece/reference + within a virtual table. Generally we can replace function pointers + and virtual table pointers. */ + +bool +can_replace_by_local_alias_in_vtable (symtab_node *node) +{ + if (is_a varpool_node * (node) + !DECL_VIRTUAL_P (node-decl)) +return false; + return can_replace_by_local_alias (node); +} + +/* walk_tree callback that rewrites initializer references. */ + +static tree +update_vtable_references (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (*tp) == VAR_DECL + || TREE_CODE (*tp) == FUNCTION_DECL) +{ + if (can_replace_by_local_alias_in_vtable (symtab_get_node (*tp))) + *tp = symtab_nonoverwritable_alias (symtab_get_node (*tp))-decl; + *walk_subtrees = 0; +} + else if (IS_TYPE_OR_DECL_P (*tp)) +*walk_subtrees = 0; + return NULL; +} + /* In LTO we can remove COMDAT groups and weak symbols. Either turn them into normal symbols or external symbol depending on resolution info. */ @@ -625,6 +655,34 @@ function_and_variable_visibility (bool w vnode-resolution = LDPR_PREVAILING_DEF_IRONLY; } update_visibility_by_resolution_info (vnode); + + /* Update virutal tables to point to local aliases where possible. */ s/virutal/virtual Martin + if (DECL_VIRTUAL_P (vnode-decl) + !DECL_EXTERNAL (vnode-decl)) + { + int i; + struct ipa_ref *ref; + bool found = false; + + /* See if there is something to update. */ + for (i = 0; ipa_ref_list_referring_iterate (vnode-ref_list, + i, ref); i++) + if (ref-use == IPA_REF_ADDR +can_replace_by_local_alias_in_vtable (ref-referred)) + { + found = true; + break; + } + if (found) + { + struct pointer_set_t *visited_nodes = pointer_set_create (); + walk_tree (DECL_INITIAL (vnode-decl), +update_vtable_references, NULL, visited_nodes); + pointer_set_destroy (visited_nodes); + ipa_remove_all_references (vnode-ref_list); + record_references_in_initializer (vnode-decl, false); + } + } } if (dump_file)
[PATCH 1/4] Make coverage_compute_cfg_checksum callable with an argument
Hello, this is a small patchset that prepares API for new IPA Identical code folding pass. The patch adds an argument for coverage_compute_cfg_checksum. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * coverage.h (coverage_compute_cfg_checksum): Argument added. * coverage.c (coverage_compute_cfg_checksum): Likewise. * profile.c (branch_prob): Likewise. diff --git a/gcc/coverage.c b/gcc/coverage.c index 5e9005e..9d81387 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -594,12 +594,12 @@ coverage_compute_profile_id (struct cgraph_node *n) but the compiler won't detect the change and use the wrong profile data. */ unsigned -coverage_compute_cfg_checksum (void) +coverage_compute_cfg_checksum (struct function *fn) { basic_block bb; - unsigned chksum = n_basic_blocks_for_fn (cfun); + unsigned chksum = n_basic_blocks_for_fn (fn); - FOR_EACH_BB_FN (bb, cfun) + FOR_EACH_BB_FN (bb, fn) { edge e; edge_iterator ei; diff --git a/gcc/coverage.h b/gcc/coverage.h index 81f87a6..392080c 100644 --- a/gcc/coverage.h +++ b/gcc/coverage.h @@ -32,8 +32,8 @@ extern int coverage_begin_function (unsigned, unsigned); /* Complete the coverage information for the current function. */ extern void coverage_end_function (unsigned, unsigned); -/* Compute the control flow checksum for the current function. */ -extern unsigned coverage_compute_cfg_checksum (void); +/* Compute the control flow checksum for the FUNCTION given as argument. */ +extern unsigned coverage_compute_cfg_checksum (struct function *); /* Compute the profile id of function N. */ extern unsigned coverage_compute_profile_id (struct cgraph_node *n); diff --git a/gcc/profile.c b/gcc/profile.c index 3282ee7..4e82eab 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -1195,7 +1195,7 @@ branch_prob (void) the checksum in only once place, since it depends on the shape of the control flow which can change during various transformations. */ - cfg_checksum = coverage_compute_cfg_checksum (); + cfg_checksum = coverage_compute_cfg_checksum (cfun); lineno_checksum = coverage_compute_lineno_checksum (); /* Write the data from which gcov can reconstruct the basic block -- 1.8.4.5
[PATCH 2/4] Enhancement of call graph API
Hello, this patch enhances callgraph API to enable more precise control of expand_thunk; another function becomes global. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * cgraph.h (expand_thunk): New argument added. (address_taken_from_non_vtable_p): New global function. * ipa-visibility.c (address_taken_from_non_vtable_p): Likewise. * cgraphclones.c (duplicate_thunk_for_node): Argument added to call. * cgraphunit.c (analyze_function): Likewise. (assemble_thunks_and_aliases): Argument added to call. (expand_thunk): New argument forces to produce GIMPLE thunk. diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e5aa833..bfd3d91 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -911,7 +911,7 @@ void fixup_same_cpp_alias_visibility (symtab_node *, symtab_node *target, tree); IN_SSA is true if the gimple is in SSA. */ basic_block init_lowered_empty_function (tree, bool); void cgraph_reset_node (struct cgraph_node *); -bool expand_thunk (struct cgraph_node *, bool); +bool expand_thunk (struct cgraph_node *, bool, bool); /* In cgraphclones.c */ @@ -956,6 +956,7 @@ void ipa_record_stmt_references (struct cgraph_node *, gimple); /* In ipa.c */ bool symtab_remove_unreachable_nodes (bool, FILE *); +bool address_taken_from_non_vtable_p (symtab_node *node); cgraph_node_set cgraph_node_set_new (void); cgraph_node_set_iterator cgraph_node_set_find (cgraph_node_set, struct cgraph_node *); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 4387b99..75eba6d 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -353,7 +353,7 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node *node, CGRAPH_FREQ_BASE); e-call_stmt_cannot_inline_p = true; cgraph_call_edge_duplication_hooks (thunk-callees, e); - if (!expand_thunk (new_thunk, false)) + if (!expand_thunk (new_thunk, false, false)) new_thunk-analyzed = true; cgraph_call_node_duplication_hooks (thunk, new_thunk); return new_thunk; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index e19b0a2..55bf378 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -610,7 +610,7 @@ analyze_function (struct cgraph_node *node) { cgraph_create_edge (node, cgraph_get_node (node-thunk.alias), NULL, 0, CGRAPH_FREQ_BASE); - if (!expand_thunk (node, false)) + if (!expand_thunk (node, false, false)) { node-thunk.alias = NULL; node-analyzed = true; @@ -1466,11 +1466,13 @@ thunk_adjust (gimple_stmt_iterator * bsi, } /* Expand thunk NODE to gimple if possible. + When FORCE_GIMPLE_THUNK is true, gimple thunk is created and + no assembler is produced. When OUTPUT_ASM_THUNK is true, also produce assembler for thunks that are not lowered. */ bool -expand_thunk (struct cgraph_node *node, bool output_asm_thunks) +expand_thunk (struct cgraph_node *node, bool output_asm_thunks, bool force_gimple_thunk) { bool this_adjusting = node-thunk.this_adjusting; HOST_WIDE_INT fixed_offset = node-thunk.fixed_offset; @@ -1481,7 +1483,7 @@ expand_thunk (struct cgraph_node *node, bool output_asm_thunks) tree a; - if (this_adjusting + if (!force_gimple_thunk this_adjusting targetm.asm_out.can_output_mi_thunk (thunk_fndecl, fixed_offset, virtual_value, alias)) { @@ -1719,7 +1721,7 @@ assemble_thunks_and_aliases (struct cgraph_node *node) e = e-next_caller; assemble_thunks_and_aliases (thunk); -expand_thunk (thunk, true); +expand_thunk (thunk, true, false); } else e = e-next_caller; diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c index dc22b2e..7886722 100644 --- a/gcc/ipa-visibility.c +++ b/gcc/ipa-visibility.c @@ -115,7 +115,7 @@ cgraph_local_node_p (struct cgraph_node *node) } /* Return true when there is a reference to node and it is not vtable. */ -static bool +bool address_taken_from_non_vtable_p (symtab_node *node) { int i; -- 1.8.4.5
[PATCH 3/4] New attribute lookup function addition
Hi, this patch introduces a new function lookup_attribute_starting that can find all attributes starting with a specified string. Purpose of the function is to be able to identify e.g. if a function has any 'omp' attribute. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * tree.h (private_lookup_attribute_starting): New function. (lookup_attribute_starting): Likewise. * tree.c (private_lookup_attribute_starting): Likewise. diff --git a/gcc/tree.c b/gcc/tree.c index cf7e362..9c6b68c 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -5758,6 +5758,37 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list) return list; } +tree +private_lookup_attribute_starting (const char *attr_name, size_t attr_len, tree list) +{ + while (list) +{ + size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list)); + + if (attr_len ident_len) + { + list = TREE_CHAIN (list); + continue; + } + + const char *p = IDENTIFIER_POINTER (get_attribute_name (list)); + + if (strncmp (attr_name, p, attr_len) == 0) + break; + + /* TODO: If we made sure that attributes were stored in the +canonical form without '__...__' (ie, as in 'text' as opposed +to '__text__') then we could avoid the following case. */ + if (p[0] == '_' p[1] == '_' strncmp (attr_name, p + 2, attr_len) == 0) + break; + + list = TREE_CHAIN (list); +} + + return list; +} + + /* A variant of lookup_attribute() that can be used with an identifier as the first argument, and where the identifier can be either 'text' or '__text__'. diff --git a/gcc/tree.h b/gcc/tree.h index 9fe7360..3e1b113 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3731,6 +3731,10 @@ extern tree merge_type_attributes (tree, tree); and you should never call it directly. */ extern tree private_lookup_attribute (const char *, size_t, tree); +/* This function is a private implementation detail + of lookup_attribute_starting() and you should never call it directly. */ +extern tree private_lookup_attribute_starting (const char *, size_t, tree); + /* Given an attribute name ATTR_NAME and a list of attributes LIST, return a pointer to the attribute's list element if the attribute is part of the list, or NULL_TREE if not found. If the attribute @@ -3753,6 +3757,23 @@ lookup_attribute (const char *attr_name, tree list) return private_lookup_attribute (attr_name, strlen (attr_name), list); } +/* Given an attribute name ATTR_NAME and a list of attributes LIST, + return a pointer to the attribute's list element if the attribute + starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not + '__text__'). */ + +static inline tree +lookup_attribute_starting (const char *attr_name, tree list) +{ + gcc_checking_assert (attr_name[0] != '_'); + /* In most cases, list is NULL_TREE. */ + if (list == NULL_TREE) +return NULL_TREE; + else +return private_lookup_attribute_starting (attr_name, strlen (attr_name), list); +} + + /* This function is a private implementation detail of is_attribute_p() and you should never call it directly. */ extern bool private_is_attribute_p (const char *, size_t, const_tree); -- 1.8.4.5
Re: [PATCH 1/4] Make coverage_compute_cfg_checksum callable with an argument
On 05/30/2014 06:28 PM, Jeff Law wrote: On 05/30/14 00:47, Martin Liška wrote: Hello, this is a small patchset that prepares API for new IPA Identical code folding pass. The patch adds an argument for coverage_compute_cfg_checksum. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * coverage.h (coverage_compute_cfg_checksum): Argument added. * coverage.c (coverage_compute_cfg_checksum): Likewise. * profile.c (branch_prob): Likewise. The block comment for coverage_compute_cfg_checksum needs to be updated. We're no longer computing the checksum for the current function (cfun), but instead computing the checksum for the argument FN. Hi, thank you for your feedback, I've just fixed the patch and will commit soon. Martin Otherwise OK for the trunk. jeff
Re: [PATCH 2/4] Enhancement of call graph API
On 05/30/2014 06:42 PM, Jeff Law wrote: On 05/30/14 00:47, Martin Liška wrote: Hello, this patch enhances callgraph API to enable more precise control of expand_thunk; another function becomes global. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * cgraph.h (expand_thunk): New argument added. (address_taken_from_non_vtable_p): New global function. * ipa-visibility.c (address_taken_from_non_vtable_p): Likewise. * cgraphclones.c (duplicate_thunk_for_node): Argument added to call. * cgraphunit.c (analyze_function): Likewise. (assemble_thunks_and_aliases): Argument added to call. (expand_thunk): New argument forces to produce GIMPLE thunk. Only concern here is the location of the prototype for address_taken_from_non_vtable_p. Though I guess other things form ipa-visibility.c are prototyped in cgraph.h. Can you put the prototype here in cgraph.h: /* In ipa-visibility.c */ bool cgraph_local_node_p (struct cgraph_node *); Otherwise OK. Real curious to see the meat of the optimization now :-) Hello, thanks too. It was really a wrong place for the declaration. Yeah, the optimization will be juicy :) Martin jeff
Re: [PATCH 3/4] New attribute lookup function addition
On 05/30/2014 06:37 PM, Jeff Law wrote: On 05/30/14 00:49, Martin Liška wrote: Hi, this patch introduces a new function lookup_attribute_starting that can find all attributes starting with a specified string. Purpose of the function is to be able to identify e.g. if a function has any 'omp' attribute. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * tree.h (private_lookup_attribute_starting): New function. (lookup_attribute_starting): Likewise. * tree.c (private_lookup_attribute_starting): Likewise. private_lookup_attribute_starting needs a block comment. Added. +tree +private_lookup_attribute_starting (const char *attr_name, size_t attr_len, tree list) Long line needs to be wrapped? Please review the patch for lines that need wrapping at 80 columns. Fixed too. So it's really a lookup by prefix, so I'd probably use a name like lookup_attribute_by_prefix. Why private_ in the function name? I used the same construction as for function 'private_is_attribute_p'; I hope the construction is fine? It appears it just returns the first attribute from LIST with the given prefix. Presumably you use it iteratively. +/* Given an attribute name ATTR_NAME and a list of attributes LIST, + return a pointer to the attribute's list element if the attribute + starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not + '__text__'). */ + +static inline tree +lookup_attribute_starting (const char *attr_name, tree list) +{ + gcc_checking_assert (attr_name[0] != '_'); + /* In most cases, list is NULL_TREE. */ + if (list == NULL_TREE) +return NULL_TREE; + else +return private_lookup_attribute_starting (attr_name, strlen (attr_name), list); +} So again, I prefer prefix rather than starting. Similarly this is meant to be called iteratively since you only get the first attribute with the given prefix, right? I added a comment that it returns just such first argument. Is the reworked patch OK for trunk? Martin OK with the nit fixes mentioned above. Jeff From be3ab469ee70ff3de434f5326c1a2eabf07da3ed Mon Sep 17 00:00:00 2001 Message-Id: be3ab469ee70ff3de434f5326c1a2eabf07da3ed.1401718733.git.mli...@suse.cz In-Reply-To: e245d67afb610a2f210b83382b49f75731ba68b8.1401718733.git.mli...@suse.cz References: e245d67afb610a2f210b83382b49f75731ba68b8.1401718733.git.mli...@suse.cz From: mliska mli...@suse.cz Date: Thu, 29 May 2014 17:18:34 +0200 Subject: [PATCH 3/4] New attribute lookup function addition To: gcc-patches@gcc.gnu.org Hi, this patch introduces a new function lookup_attribute_starting that can find all attributes starting with a specified string. Purpose of the function is to be able to identify e.g. if a function has any 'omp' attribute. Bootstrapped and tested on x86_64-linux. OK for trunk? Thanks, Martin 2014-05-29 Martin Liska mli...@suse.cz * tree.h (private_lookup_attribute_starting): New function. (lookup_attribute_starting): Likewise. * tree.c (private_lookup_attribute_starting): Likewise. diff --git a/gcc/tree.c b/gcc/tree.c index cf7e362..f983408 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -5758,6 +5758,44 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list) return list; } +/* Given an attribute name ATTR_NAME and a list of attributes LIST, + return a pointer to the attribute's list first element if the attribute + starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not + '__text__'). */ + +tree +private_lookup_attribute_by_prefix (const char *attr_name, size_t attr_len, +tree list) +{ + while (list) +{ + size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list)); + + if (attr_len ident_len) + { + list = TREE_CHAIN (list); + continue; + } + + const char *p = IDENTIFIER_POINTER (get_attribute_name (list)); + + if (strncmp (attr_name, p, attr_len) == 0) + break; + + /* TODO: If we made sure that attributes were stored in the + canonical form without '__...__' (ie, as in 'text' as opposed + to '__text__') then we could avoid the following case. */ + if (p[0] == '_' p[1] == '_' + strncmp (attr_name, p + 2, attr_len) == 0) + break; + + list = TREE_CHAIN (list); +} + + return list; +} + + /* A variant of lookup_attribute() that can be used with an identifier as the first argument, and where the identifier can be either 'text' or '__text__'. diff --git a/gcc/tree.h b/gcc/tree.h index 9fe7360..e592280 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3731,6 +3731,10 @@ extern tree merge_type_attributes (tree, tree); and you should never call it directly. */ extern tree private_lookup_attribute (const char *, size_t, tree); +/* This function is a private implementation detail + of lookup_attribute_by_prefix() and you should never call it directly. */ +extern tree private_lookup_attribute_by_prefix (const char *, size_t, tree
[PATCH] PR 61642
Hello, I send patch for PR61642. Changelog: 2014-06-11 Martin Liska mli...@suse.cz * ipa-prop.c (ipa_make_edge_direct_to_target): Check that gimple call statement is reachable. Bootstrapped and regtested on x86_64-unknown-linux-gnu. Ready for trunk? Martin diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index d02093a..1ffd173 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -2673,13 +2673,19 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target) if (dump_enabled_p ()) { - location_t loc = gimple_location (ie-call_stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - discovered direct call to non-function in %s/%i, - making it __builtin_unreachable\n, - ie-caller-name (), - ie-caller-order); + const char *fmt = discovered direct call to non-function in %s/%i, +making it __builtin_unreachable\n; + + if (ie-call_stmt) + { + location_t loc = gimple_location (ie-call_stmt); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt, + ie-caller-name (), ie-caller-order); + } + else if (dump_file) + fprintf (dump_file, fmt, ie-caller-name (), ie-caller-order); } + target = builtin_decl_implicit (BUILT_IN_UNREACHABLE); callee = cgraph_get_create_node (target); unreachable = true; @@ -2739,10 +2745,18 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target) } if (dump_enabled_p ()) { - location_t loc = gimple_location (ie-call_stmt); - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - converting indirect call in %s to direct call to %s\n, - ie-caller-name (), callee-name ()); + const char *fmt = converting indirect call in %s to direct call to %s\n; + + if (ie-call_stmt) +{ + location_t loc = gimple_location (ie-call_stmt); + + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt, + ie-caller-name (), callee-name ()); + +} + else if (dump_file) + fprintf (dump_file, fmt, ie-caller-name (), callee-name ()); } ie = cgraph_make_edge_direct (ie, callee); es = inline_edge_summary (ie);
Re: [PATCH] PR 61642
On 06/11/2014 02:01 PM, Richard Biener wrote: On Wed, Jun 11, 2014 at 1:58 PM, Martin Liška mli...@suse.cz wrote: Hello, I send patch for PR61642. 61462? Hello, that's bogus, I am not a visionary that creates a patch for a future bug :) Thanks, Martin Changelog: 2014-06-11 Martin Liska mli...@suse.cz PR ipa/61462 * ipa-prop.c (ipa_make_edge_direct_to_target): Check that gimple call statement is reachable. Bootstrapped and regtested on x86_64-unknown-linux-gnu. Ready for trunk? Ok with those adjustments. Richard. Martin
Re: Make ipa-ref somewhat less stupid
On 06/10/2014 08:34 AM, Jan Hubicka wrote: Hi, ipa-reference is somewhat stupid and builds its data sets for all variables including addressable and public one just to prune them out after all bitmaps are constructed. This used to make sense when the profile generation happened at compile time, but since ipa_ref datastructure was intrdocued this is a nonsense. Martin: It may be interesting to check if this solves the memory use issues with chrome. We also may be able to re-enable ipa-ref with profile-generate as I think all the datastructures are considered to have address taken. Hi, there is a link to chromium stats: https://drive.google.com/file/d/0B0pisUJ80pO1VmNHeklCRWVkOUU/edit?usp=sharing Both compilation were run with '-flto=6', where the upper graph adds '-fprofile-generate'. Memory footprint is IMHO acceptable, but compilation process takes twice longer with profile generation. Yeah, chromium contains a really big code base :) Martin Honza Bootstrapped/regtested x86_64-linux. * ipa-reference.c (is_proper_for_analysis): Exclude addressable and public vars. (intersect_static_var_sets): Remove. (propagate): Do not prune local statics. Index: ipa-reference.c === --- ipa-reference.c (revision 211364) +++ ipa-reference.c (working copy) @@ -243,6 +243,17 @@ is_proper_for_analysis (tree t) if (TREE_READONLY (t)) return false; + /* We can not track variables with address taken. */ + if (TREE_ADDRESSABLE (t)) +return false; + + /* TODO: We could track public variables that are not addressable, but currently + frontends don't give us those. */ + if (TREE_PUBLIC (t)) +return false; + + /* TODO: Check aliases. */ + /* This is a variable we care about. Check if we have seen it before, and if not add it the set of variables we care about. */ if (all_module_statics @@ -312,26 +323,6 @@ union_static_var_sets (bitmap x, bitmap return x == all_module_statics; } -/* Compute X = Y, taking into account the possibility that - X may become the maximum set. */ - -static bool -intersect_static_var_sets (bitmap x, bitmap y) -{ - if (x != all_module_statics) -{ - bitmap_and_into (x, y); - /* As with union_static_var_sets, reducing to the maximum -set as early as possible is an overall win. */ - if (bitmap_equal_p (x, all_module_statics)) - { - BITMAP_FREE (x); - x = all_module_statics; - } -} - return x == all_module_statics; -} - /* Return a copy of SET on the bitmap obstack containing SET. But if SET is NULL or the maximum set, return that instead. */ @@ -669,7 +660,6 @@ static unsigned int propagate (void) { struct cgraph_node *node; - varpool_node *vnode; struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); int order_pos; @@ -681,25 +671,6 @@ propagate (void) ipa_discover_readonly_nonaddressable_vars (); generate_summary (); - /* Now we know what vars are really statics; prune out those that aren't. */ - FOR_EACH_VARIABLE (vnode) -if (vnode-externally_visible - || TREE_ADDRESSABLE (vnode-decl) - || TREE_READONLY (vnode-decl) - || !is_proper_for_analysis (vnode-decl) - || !vnode-definition) - bitmap_clear_bit (all_module_statics, DECL_UID (vnode-decl)); - - /* Forget info we collected just for fun on variables that turned out to be - non-local. */ - FOR_EACH_DEFINED_FUNCTION (node) -{ - ipa_reference_local_vars_info_t node_l; - node_l = get_reference_vars_info (node)-local; - intersect_static_var_sets (node_l-statics_read, all_module_statics); - intersect_static_var_sets (node_l-statics_written, all_module_statics); -} - /* Propagate the local information through the call graph to produce the global information. All the nodes within a cycle will have the same info so we collapse cycles first. Then we can do the
Re: Make ipa-ref somewhat less stupid
On 06/16/2014 10:01 AM, Jan Hubicka wrote: On 06/10/2014 08:34 AM, Jan Hubicka wrote: Hi, ipa-reference is somewhat stupid and builds its data sets for all variables including addressable and public one just to prune them out after all bitmaps are constructed. This used to make sense when the profile generation happened at compile time, but since ipa_ref datastructure was intrdocued this is a nonsense. Martin: It may be interesting to check if this solves the memory use issues with chrome. We also may be able to re-enable ipa-ref with profile-generate as I think all the datastructures are considered to have address taken. Hi, there is a link to chromium stats: https://drive.google.com/file/d/0B0pisUJ80pO1VmNHeklCRWVkOUU/edit?usp=sharing Both compilation were run with '-flto=6', where the upper graph adds '-fprofile-generate'. Memory footprint is IMHO acceptable, but compilation process takes twice longer with profile generation. Yeah, chromium contains a really big code base :) Yep, I wonder why WPA takes so much longer. Do you think you can build lto1 with --enable-gather-detailed-mem-stats and relink with -fpre-ipa-mem-report -fpost-ipa-mem-report -fmem-report -Q and send me the output? It would be nice to push Chromium under 4GB of WPA :) There's report you requested: https://drive.google.com/file/d/0B0pisUJ80pO1RlRRTVBxUG5vSlE/edit?usp=sharing , produced by -fno-profile-generate. With enabled -fprofile-generate, WPA stage cannot fit to 24GB memory with enabled memory stats. Martin Thanks a lot! Honza
Re: [PATCH 1/5] New Identical Code Folding IPA pass
On 06/17/2014 10:14 PM, David Malcolm wrote: On Fri, 2014-06-13 at 12:24 +0200, mliska wrote: [...snip...] Statistics about the pass: Inkscape: 11.95 MB - 11.44 MB (-4.27%) Firefox: 70.12 MB - 70.12 MB (-3.07%) FWIW, you wrote 70.12 MB here for both before and after for Firefox, but give a -3.07% change, which seems like a typo. A 3.07% reduction from 70.12 MB would be 67.97 MB; was this what the pass achieved? Hi, it's typo, original size of FF is 72.34 MB. I hope -3.07% is the correctly evaluated achievement. Thanks, Martin [...snip...] Thanks (nice patch, btw) Dave
Re: [PATCH 1/5] New Identical Code Folding IPA pass
On 06/17/2014 10:09 PM, Paolo Carlini wrote: Hi, On 13/06/14 12:24, mliska wrote: The optimization is inspired by Microsoft /OPT:ICF optimization (http://msdn.microsoft.com/en-us/library/bxwfs976.aspx) that merges COMDAT sections with each function reside in a separate section. In terms of C++ testcases, I'm wondering if you already double checked that the new pass already does well on the typical examples on which, I was told, the Microsoft optimization is known to do well, eg, code instantiating std::vector for different pointer types, or even long and long long on x86_64-linux, things like that. I've just added another C++ test case: #include vector using namespace std; static vectorvectorint * a; static vectorvoid * b; int main() { return b.size() + a.size (); } where the pass identifies following equality: Semantic equality hit:std::vector_Tp, _Alloc::size_type std::vector_Tp, _Alloc::size() const [with _Tp = std::vectorint*; _Alloc = std::allocatorstd::vectorint*; std::vector_Tp, _Alloc::size_type = long unsigned int]-std::vector_Tp, _Alloc::size_type std::vector_Tp, _Alloc::size() const [with _Tp = void*; _Alloc = std::allocatorvoid*; std::vector_Tp, _Alloc::size_type = long unsigned int] Semantic equality hit:static void std::_Destroy_auxtrue::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = void**]-static void std::_Destroy_auxtrue::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::vectorint**] Semantic equality hit:void std::_Destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = void**]-void std::_Destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::vectorint**] Semantic equality hit:void std::_Destroy(_ForwardIterator, _ForwardIterator, std::allocator_T2) [with _ForwardIterator = void**; _Tp = void*]-void std::_Destroy(_ForwardIterator, _ForwardIterator, std::allocator_T2) [with _ForwardIterator = std::vectorint**; _Tp = std::vectorint*] Semantic equality hit:void __gnu_cxx::new_allocator_Tp::deallocate(__gnu_cxx::new_allocator_Tp::pointer, __gnu_cxx::new_allocator_Tp::size_type) [with _Tp = void*; __gnu_cxx::new_allocator_Tp::pointer = void**; __gnu_cxx::new_allocator_Tp::size_type = long unsigned int]-void __gnu_cxx::new_allocator_Tp::deallocate(__gnu_cxx::new_allocator_Tp::pointer, __gnu_cxx::new_allocator_Tp::size_type) [with _Tp = std::vectorint*; __gnu_cxx::new_allocator_Tp::pointer = std::vectorint**; __gnu_cxx::new_allocator_Tp::size_type = long unsigned int] Semantic equality hit:static void __gnu_cxx::__alloc_traits_Alloc::deallocate(_Alloc, __gnu_cxx::__alloc_traits_Alloc::pointer, __gnu_cxx::__alloc_traits_Alloc::size_type) [with _Alloc = std::allocatorvoid*; __gnu_cxx::__alloc_traits_Alloc::pointer = void**; __gnu_cxx::__alloc_traits_Alloc::size_type = long unsigned int]-static void __gnu_cxx::__alloc_traits_Alloc::deallocate(_Alloc, __gnu_cxx::__alloc_traits_Alloc::pointer, __gnu_cxx::__alloc_traits_Alloc::size_type) [with _Alloc = std::allocatorstd::vectorint*; __gnu_cxx::__alloc_traits_Alloc::pointer = std::vectorint**; __gnu_cxx::__alloc_traits_Alloc::size_type = long unsigned int] As one would expect, there is a function 'size'. Martin Thanks, Paolo.
Re: [PATCH 4/5] Existing tests fix
On 06/17/2014 10:50 PM, Rainer Orth wrote: Jeff Law l...@redhat.com writes: On 06/13/14 04:48, mliska wrote: Hi, many tests rely on a precise number of scanned functions in a dump file. If IPA ICF decides to merge some function and(or) read-only variables, counts do not match. Martin Changelog: 2014-06-13 Martin Liska mli...@suse.cz Honza Hubicka hubi...@ucw.cz * c-c++-common/rotate-1.c: Text ^ Huh? You are right, batch replacement mistake. There should be: * c-c++-common/rotate-1.c: Update dg-options. * c-c++-common/rotate-2.c: Likewise. ... Martin * c-c++-common/rotate-2.c: New test. * c-c++-common/rotate-3.c: Likewise. Rainer
[PATCH] Fix gimple-fold
Hello, I found ICE in Chromium compiled with LTO. There's a call that is proved by ipa-devirt as __builtin_unreachable; same decision is done by gimple-fold and this call is replaced by GIMPLE_CALL and GIMPLE_ASSIGN (in this order). After that condition for cgraph_update_edges_for_call_stmt_node is not satisfied and corresponding cgraph_edge is not updated. Thus a verifier reports a wrong edge. Bootstrapped and tested on a x86_64 machine. Changelog: 2014-03-18 Martin Liska mli...@suse.cz * cgraph.c (cgraph_update_edges_for_call_stmt_node): added case when gimple call statement is updated. * gimple-fold.c (gimple_fold_call): changed order for GIMPLE_ASSIGN and GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL. OK for trunk? Thank you, Martin diff --git a/gcc/cgraph.c b/gcc/cgraph.c index a15b6bc..cd68894 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1519,7 +1519,11 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node, { if (callee-decl == new_call || callee-former_clone_of == new_call) - return; +{ + cgraph_set_call_stmt (cgraph_edge (node, old_stmt), +new_stmt); + return; +} callee = callee-clone_of; } } diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index eafdb2d..a033fbc 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -1153,8 +1153,14 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) { tree var = create_tmp_var (TREE_TYPE (lhs), NULL); tree def = get_or_create_ssa_default_def (cfun, var); - gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); - update_call_from_tree (gsi, def); + + /* To satisfy condition for + cgraph_update_edges_for_call_stmt_node, + we need to preserve GIMPLE_CALL statement + at position of GSI iterator. */ + gimple_stmt_iterator oldgsi = *gsi; + gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT); + update_call_from_tree (oldgsi, def); } else gsi_replace (gsi, new_stmt, true);
Re: [PATCH] Fix gimple-fold
Thank you for feedback, new changelog: 2014-03-18 Martin Liska mli...@suse.cz * cgraph.c (cgraph_update_edges_for_call_stmt_node): Added case when gimple call statement is update. * gimple-fold.c (gimple_fold_call): Changed order for GIMPLE_ASSIGN and GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL. OK for trunk? Martin On 03/18/2014 02:13 PM, Jakub Jelinek wrote: Hi! 2014-03-18 Martin Liska mli...@suse.cz * cgraph.c (cgraph_update_edges_for_call_stmt_node): added case when gimple call statement is updated. Capital letter after : * gimple-fold.c (gimple_fold_call): changed order for GIMPLE_ASSIGN and Likewise here. Jakub diff --git a/gcc/cgraph.c b/gcc/cgraph.c index a15b6bc..269146a 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1519,7 +1519,10 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node, { if (callee-decl == new_call || callee-former_clone_of == new_call) - return; +{ + cgraph_set_call_stmt (e, new_stmt); + return; +} callee = callee-clone_of; } } diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index eafdb2d..177abc1 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -1153,8 +1153,13 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) { tree var = create_tmp_var (TREE_TYPE (lhs), NULL); tree def = get_or_create_ssa_default_def (cfun, var); - gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + /* To satisfy condition for + cgraph_update_edges_for_call_stmt_node, + we need to preserve GIMPLE_CALL statement + at position of GSI iterator. */ update_call_from_tree (gsi, def); + gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT); } else gsi_replace (gsi, new_stmt, true);
Re: [PATCH] Fix gimple-fold
Patch passes bootstrap and regtest. I fixed indentation according to discussion with Jakub. OK for trunk? Thanks, Martin On 03/18/2014 02:55 PM, Richard Biener wrote: On Tue, Mar 18, 2014 at 2:29 PM, Martin Liška mli...@suse.cz wrote: Thank you for feedback, Ok if it passes bootstrap / regtest. Thanks, Richard. new changelog: 2014-03-18 Martin Liska mli...@suse.cz * cgraph.c (cgraph_update_edges_for_call_stmt_node): Added case when gimple call statement is update. * gimple-fold.c (gimple_fold_call): Changed order for GIMPLE_ASSIGN and GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL. OK for trunk? Martin On 03/18/2014 02:13 PM, Jakub Jelinek wrote: Hi! 2014-03-18 Martin Liska mli...@suse.cz * cgraph.c (cgraph_update_edges_for_call_stmt_node): added case when gimple call statement is updated. Capital letter after : * gimple-fold.c (gimple_fold_call): changed order for GIMPLE_ASSIGN and Likewise here. Jakub diff --git a/gcc/cgraph.c b/gcc/cgraph.c index a15b6bc..577352f 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1519,7 +1519,10 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node, { if (callee-decl == new_call || callee-former_clone_of == new_call) - return; + { + cgraph_set_call_stmt (e, new_stmt); + return; + } callee = callee-clone_of; } } diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index eafdb2d..adc9d49 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -1153,8 +1153,13 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) { tree var = create_tmp_var (TREE_TYPE (lhs), NULL); tree def = get_or_create_ssa_default_def (cfun, var); - gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + /* To satisfy condition for + cgraph_update_edges_for_call_stmt_node, + we need to preserve GIMPLE_CALL statement + at position of GSI iterator. */ update_call_from_tree (gsi, def); + gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT); } else gsi_replace (gsi, new_stmt, true);
Re: [PATCH] Avoid ggc_collect () after WPA forking
On 03/19/2014 03:55 PM, Richard Biener wrote: On Wed, 19 Mar 2014, Martin Liška wrote: There are stats for Firefox with LTO and -O2. According to graphs it looks that memory consumption for parallel WPA phase is similar. When I disable parallel WPA, wpa footprint is ~4GB, but ltrans memory footprint is similar to parallel WPA that reduces libxul.so linking by ~10%. Ok, so I suppose this tracks RSS, not virtual memory use (what is used and what is active)? Data are given by vmstat, according to: http://stackoverflow.com/questions/18529723/what-is-active-memory-and-inactive-memory *Active memory*is memory that is being used by a particular process. *Inactive memory*is memory that was allocated to a process that is no longer running. So please follow just 'blue' line that displays really used memory. According to man, vmstat tracks virtual memory statistics. And it is WPA plus LTRANS stages, WPA ends where memory use first goes down to zero? I wonder if you can identify the point where parallel streaming starts and where it ends ... ;) Exactly, WPA ends when it goes to zero. Btw, I have another patch in my local tree, limiting the exponential growth of blocks we allocate when outputting sections. But it shouldn't be _that_ bad ... maybe you can try if it has any effect? I can apply it. Martin Thanks, Richard. Index: gcc/lto-section-out.c === --- gcc/lto-section-out.c (revision 208642) +++ gcc/lto-section-out.c (working copy) @@ -99,13 +99,19 @@ lto_end_section (void) } +/* We exponentially grow the size of the blocks as we need to make + room for more data to be written. Start with a single page and go up + to 2MB pages for this. */ +#define FIRST_BLOCK_SIZE 4096 +#define MAX_BLOCK_SIZE (2 * 1024 * 1024) + /* Write all of the chars in OBS to the assembler. Recycle the blocks in obs as this is being done. */ void lto_write_stream (struct lto_output_stream *obs) { - unsigned int block_size = 1024; + unsigned int block_size = FIRST_BLOCK_SIZE; struct lto_char_ptr_base *block; struct lto_char_ptr_base *next_block; if (!obs-first_block) @@ -135,6 +141,7 @@ lto_write_stream (struct lto_output_stre else lang_hooks.lto.append_data (base, num_chars, block); block_size *= 2; + block_size = MIN (MAX_BLOCK_SIZE, block_size); } } @@ -152,7 +159,7 @@ lto_append_block (struct lto_output_stre { /* This is the first time the stream has been written into. */ - obs-block_size = 1024; + obs-block_size = FIRST_BLOCK_SIZE; new_block = (struct lto_char_ptr_base*) xmalloc (obs-block_size); obs-first_block = new_block; } @@ -162,6 +169,7 @@ lto_append_block (struct lto_output_stre /* Get a new block that is twice as big as the last block and link it into the list. */ obs-block_size *= 2; + obs-block_size = MIN (MAX_BLOCK_SIZE, obs-block_size); new_block = (struct lto_char_ptr_base*) xmalloc (obs-block_size); /* The first bytes of the block are reserved as a pointer to the next block. Set the chain of the full block to the
Re: [testsuite] Fix gcc.dg/ipa/ipa-icf-2[18].c on Solaris
On 10/23/2014 11:38 AM, Jakub Jelinek wrote: On Thu, Oct 23, 2014 at 11:35:44AM +0200, Rainer Orth wrote: The testcase is clearly i?86/x86_64 specific, so it must be guarded with { target i?86-*-* x86_64-*-* }, and as it is a compile time test, no need Right, that's what my patch already does. for assembler support, so just adding -msse2 option to dg-option is the right thing to do. Ok, will try that. Still no idea why Linux/x86 and Solaris/x86 are different here. Why do you think so? I certainly have: FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors) UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Equal symbols: 1 UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Semantic equality hit:bar-foo in my i686-linux test_summary log. It really depends on how the compiler is configured, if it defaults to march that has sse/sse2 by default, it will succeed, otherwise it will not. Jakub Hello. I combined both patches and run regression tests on x86_64-linux-pc. Ready for trunk? Thanks, Martin diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c index 7358e43..68aabc5 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c @@ -1,5 +1,5 @@ -/* { dg-do compile } */ -/* { dg-options -O2 -fdump-ipa-icf } */ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-options -O2 -msse2 -fdump-ipa-icf } */ #include xmmintrin.h diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c index 538e0ab..bcaf84e 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target init_priority } } */ /* { dg-options -O2 -fdump-ipa-icf-details -fno-inline } */ __attribute__ ((noinline, constructor(200)))
Re: [PATCH 5/5] New tests introduction
On 10/19/2014 09:50 AM, Andreas Schwab wrote: Martin Liška mli...@suse.cz writes: diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c new file mode 100644 index 000..7358e43 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fdump-ipa-icf } */ + +#include xmmintrin.h + +__attribute__ ((noinline)) +void foo() +{ + float x = 1.2345f; + __m128 v =_mm_load1_ps(x); +} + +__attribute__ ((noinline)) +void bar() +{ + float x = 1.2345f; + __m128 v =_mm_load1_ps(x); +} + +int main() +{ + return 2; +} + +/* { dg-final { scan-ipa-dump Semantic equality hit:bar-foo icf } } */ +/* { dg-final { scan-ipa-dump Equal symbols: 1 icf } } */ +/* { dg-final { cleanup-ipa-dump icf } } */ FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors) Excess errors: /usr/local/gcc/gcc-20141019/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c:4:23: fatal e\ rror: xmmintrin.h: No such file or directory compilation terminated. Andreas. Hello Andreas. Starting from r216589 this problem should be fixed. Thanks, Martin
Re: [testsuite] Fix gcc.dg/ipa/ipa-icf-2[18].c on Solaris
On 10/23/2014 04:03 PM, Uros Bizjak wrote: On Thu, Oct 23, 2014 at 3:54 PM, Uros Bizjak ubiz...@gmail.com wrote: Ok, will try that. Still no idea why Linux/x86 and Solaris/x86 are different here. Why do you think so? I certainly have: FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors) UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Equal symbols: 1 UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Semantic equality hit:bar-foo in my i686-linux test_summary log. It really depends on how the compiler Right, but on i386-pc-solaris2.11 it passes. is configured, if it defaults to march that has sse/sse2 by default, it will succeed, otherwise it will not. For some reason, I hit a division by zero on alphaev68-linux-gnu in the report generation code: Starting program: /space/uros/gcc-build/prev-gcc/cc1 -O2 -fdump-ipa-icf -quiet ipa-icf-14.i Program received signal SIGFPE, Arithmetic exception. ipa_icf::sem_item_optimizer::merge_classes (this=0x12187dd80, prev_class_count=3) at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2203 2203 non_singular_classes_count); (gdb) list 2198 fprintf (dump_file, Average class size before: %.2f, after: %.2f\n, 2199 1.0f * item_count / prev_class_count, 2200 1.0f * item_count / class_count); 2201 fprintf (dump_file, Average non-singular class size: %.2f, count: %u\n, 2202 1.0f * non_singular_classes_sum / non_singular_classes_count, 2203 non_singular_classes_count); 2204 fprintf (dump_file, Equal symbols: %u\n, equal_items); 2205 fprintf (dump_file, Fraction of visited symbols: %.2f%%\n\n, 2206 100.0f * equal_items / item_count); 2207} (gdb) p non_singular_classes_count $1 = 0 Also: (gdb) p non_singular_classes_sum $1 = 0 This creates a nice NaN which can throw an exception. Hello. Sorry for a stupid bug. I attached patch that should fix these divisions by zero. I'm just wondering if we have a machine in compile farm with alpha? Thanks, Martin (gdb) bt #0 ipa_icf::sem_item_optimizer::merge_classes (this=0x12187dd80, prev_class_count=3) at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2203 #1 0x000121255c70 in ipa_icf::sem_item_optimizer::execute (this=0x12187dd80) at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:1602 #2 0x00012125999c in ipa_icf::ipa_icf_driver () at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2319 #3 0x00012125a46c in ipa_icf::pass_ipa_icf::execute (this=0x121850f20) at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2367 #4 0x0001209c10c4 in execute_one_pass (pass=0x121850f20) at /space/homedirs/uros/gcc-svn/trunk/gcc/passes.c:2156 #5 0x0001209c26f4 in execute_ipa_pass_list (pass=0x121850f20) at /space/homedirs/uros/gcc-svn/trunk/gcc/passes.c:2550 #6 0x00012048fcdc in ipa_passes () at /space/homedirs/uros/gcc-svn/trunk/gcc/cgraphunit.c:2057 Uros. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index d1238a4..e7a293e 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -1736,7 +1736,7 @@ sem_item_optimizer::parse_nonsingleton_classes (void) if (dump_file) fprintf (dump_file, Init called for %u items (%.2f%%).\n, init_called_count, - 100.0f * init_called_count / m_items.length ()); + m_items.length () ? 100.0f * init_called_count / m_items.length (): 0.0f); } /* Equality function for semantic items is used to subdivide existing @@ -2196,14 +2196,15 @@ sem_item_optimizer::merge_classes (unsigned int prev_class_count) fprintf (dump_file, Congruent classes before: %u, after: %u\n, prev_class_count, class_count); fprintf (dump_file, Average class size before: %.2f, after: %.2f\n, - 1.0f * item_count / prev_class_count, - 1.0f * item_count / class_count); + prev_class_count ? 1.0f * item_count / prev_class_count : 0.0f, + class_count ? 1.0f * item_count / class_count : 0.0f); fprintf (dump_file, Average non-singular class size: %.2f, count: %u\n, - 1.0f * non_singular_classes_sum / non_singular_classes_count, + non_singular_classes_count ? 1.0f * non_singular_classes_sum / + non_singular_classes_count : 0.0f, non_singular_classes_count); fprintf (dump_file, Equal symbols: %u\n, equal_items); fprintf (dump_file, Fraction of visited symbols: %.2f%%\n\n, - 100.0f * equal_items / item_count); + item_count ? 100.0f * equal_items / item_count : 0.0f); } for (hash_tablecongruence_class_group_hash::iterator it = m_classes.begin ();
[PATCH] Fix for PR63595
Hello. Following patch contains addition of PHI result comparison in IPA ICF. Boostrap works on x86_64-linux-pc, no regression observed. Ready for trunk? Thanks, Martin gcc/testsuite/ChangeLog: 2014-10-24 Martin Liska mli...@suse.cz * gcc.dg/ipa/pr63595.c: New test. gcc/ChangeLog: 2014-10-24 Martin Liska mli...@suse.cz * ipa-icf.c (sem_function::compare_phi_node): PHI result comparison added. diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c index d1238a4..7456fec 100644 --- a/gcc/ipa-icf.c +++ b/gcc/ipa-icf.c @@ -869,6 +869,12 @@ sem_function::compare_phi_node (basic_block bb1, basic_block bb2) phi1 = gsi_stmt (si1); phi2 = gsi_stmt (si2); + tree phi_result1 = gimple_phi_result (phi1); + tree phi_result2 = gimple_phi_result (phi2); + + if (!m_checker-compare_operand (phi_result1, phi_result2)) + return return_false_with_msg (PHI results are different); + size1 = gimple_phi_num_args (phi1); size2 = gimple_phi_num_args (phi2); diff --git a/gcc/testsuite/gcc.dg/ipa/pr63595.c b/gcc/testsuite/gcc.dg/ipa/pr63595.c new file mode 100644 index 000..52851fb --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr63595.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options -O2 -fdump-ipa-icf-details } */ + +typedef int size_t; + +typedef struct TypHeader { +unsigned long size; +struct TypHeader * * ptr; +char name[3]; +unsigned char type; +} * TypHandle; + +__attribute__((noinline)) +static TypHandle Error(const char *str, unsigned long l1, unsigned long l2) +{ + return 0; +} + +extern TypHandle (* EvTab[81]) ( TypHandle hd ); +extern TypHandle (*TabProd[28][28]) ( TypHandle, TypHandle ); + +__attribute__((noinline)) +TypHandle FunOnRight (TypHandle hdCall) +{ +TypHandle hdRes; +TypHandle hdPnt; +TypHandle hdElm; + + +if ( ((hdCall)-size) != 3*((size_t)sizeof(TypHandle)) ) +return Error(,0L,0L); +hdPnt = ((long)(((TypHandle*)((hdCall)-ptr))[1])1 ? (((TypHandle*)((hdCall)-ptr))[1]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[1]) 1) ? 1 : TypHandle*)((hdCall)-ptr))[1])-type))])TypHandle*)((hdCall)-ptr))[1]))); +hdElm = ((long)(((TypHandle*)((hdCall)-ptr))[2])1 ? (((TypHandle*)((hdCall)-ptr))[2]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[2]) 1) ? 1 : TypHandle*)((hdCall)-ptr))[2])-type))])TypHandle*)((hdCall)-ptr))[2]))); + + +hdRes = ((*TabProd[(((long)(hdPnt) 1) ? 1 : ((hdPnt)-type))][(((long)(hdElm) 1) ? 1 : ((hdElm)-type))])((hdPnt),(hdElm))); +return hdRes; +} + +__attribute__((noinline)) +TypHandle FunOnLeft (TypHandle hdCall) +{ +TypHandle hdRes; +TypHandle hdPnt; +TypHandle hdElm; + + +if ( ((hdCall)-size) != 3*((size_t)sizeof(TypHandle)) ) +return Error(,0L,0L); +hdPnt = ((long)(((TypHandle*)((hdCall)-ptr))[1])1 ? (((TypHandle*)((hdCall)-ptr))[1]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[1]) 1) ? 1 : TypHandle*)((hdCall)-ptr))[1])-type))])TypHandle*)((hdCall)-ptr))[1]))); +hdElm = ((long)(((TypHandle*)((hdCall)-ptr))[2])1 ? (((TypHandle*)((hdCall)-ptr))[2]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[2]) 1) ? 1 : TypHandle*)((hdCall)-ptr))[2])-type))])TypHandle*)((hdCall)-ptr))[2]))); + + +hdRes = ((*TabProd[(((long)(hdElm) 1) ? 1 : ((hdElm)-type))][(((long)(hdPnt) 1) ? 1 : ((hdPnt)-type))])((hdElm),(hdPnt))); +return hdRes; +} + +int main() +{ + return 0; +} + +/* { dg-final { scan-ipa-dump Equal symbols: 0 icf } } */ +/* { dg-final { scan-ipa-dump PHI results are different icf } } */ +/* { dg-final { cleanup-ipa-dump icf } } */
Re: [PATCH, IPA ICF] Fix PR63664, PR63574 (segfault in ipa-icf pass)
On 10/29/2014 02:45 PM, Ilya Enkovich wrote: On 29 Oct 10:34, Richard Biener wrote: On Tue, Oct 28, 2014 at 5:14 PM, Ilya Enkovich enkovich@gmail.com wrote: Hi, This patch fixes PR63664 and PR63574. Problem is in NULL types for labels not handled by ICF properly. I assume it is OK for labels to have NULL type and added check into ICF rather then fixed label generation. Bootstrapped and checked on linux-x86_64. OK for trunk? Instead it shouldn't be called for labels instead. Richard. Here is a version which doesn't compare types for labels. Is is OK? Hello. I've been just testing a patch, where the pass does not call compare_operand for gimple labels. As the pass creates mapping between labels and basic blocks, such comparison will not be necessary. Thanks, Martin Bootstrapped and checked on linux-x86_64. Thanks, Ilya -- gcc/ 2014-10-29 Ilya Enkovich ilya.enkov...@intel.com PR ipa/63664 PR bootstrap/63574 * ipa-icf-gimple.c (func_checker::compatible_types_p): Assert for null args. (func_checker::compare_operand): Don't compare types for labels. gcc/testsuite/ 2014-10-29 Ilya Enkovich ilya.enkov...@intel.com PR ipa/63664 * gcc.dg/ipa/pr63664.C: New. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 1369b74..094e8ab 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -169,6 +169,9 @@ bool func_checker::compatible_types_p (tree t1, tree t2, bool compare_polymorphic, bool first_argument) { + gcc_assert (t1); + gcc_assert (t2); + if (TREE_CODE (t1) != TREE_CODE (t2)) return return_false_with_msg (different tree types); @@ -214,11 +217,15 @@ func_checker::compare_operand (tree t1, tree t2) else if (!t1 || !t2) return false; - tree tt1 = TREE_TYPE (t1); - tree tt2 = TREE_TYPE (t2); + if (TREE_CODE (t1) != LABEL_DECL + TREE_CODE (t2) != LABEL_DECL) +{ + tree tt1 = TREE_TYPE (t1); + tree tt2 = TREE_TYPE (t2); - if (!func_checker::compatible_types_p (tt1, tt2)) -return false; + if (!func_checker::compatible_types_p (tt1, tt2)) + return false; +} base1 = get_addr_base_and_unit_offset (t1, offset1); base2 = get_addr_base_and_unit_offset (t2, offset2); diff --git a/gcc/testsuite/gcc.dg/ipa/pr63664.C b/gcc/testsuite/gcc.dg/ipa/pr63664.C new file mode 100644 index 000..31d96d4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr63664.C @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options -O2 } */ + +class test { + public: + test (int val, int *p) +{ + int_val = *p; + bool_val = (val != int_val); +} + + ~test () +{ + if (!bool_val) + return; +} + + int get_int_val () const { return int_val; } + + private: + bool bool_val; + int int_val; +}; + +static int __attribute__ ((noinline)) +f1 (int i, int *p) +{ + test obj (i, p); + return obj.get_int_val (); +} + +static int __attribute__ ((noinline)) +f2 (int i, int *p) +{ + test obj (i, p); + return obj.get_int_val (); +} + +int +f (int i, int *p) +{ + return f1 (i, p) + f2 (i, p); +}
[PATCH] Fix for PR63587
Hello. Following patch fixes PR63587, where we put DECL_RESULT in cgraph_node::expand_thunk to local_decls. Patch has been tested on x86_64-linux-pc without any regression and boostrap works correctly. Ready for thunk? Thanks, Martin gcc/testsuite/ChangeLog: 2014-10-29 Martin Liska mli...@suse.cz * g++.dg/ipa/pr63587-1.C: New test. * g++.dg/ipa/pr63587-2.C: New test. gcc/ChangeLog: 2014-10-29 Martin Liska mli...@suse.cz * cgraphunit.c (cgraph_node::expand_thunk): Only VAR_DECLs are put to local declarations. * function.c (add_local_decl): Implementation moved from header file, assert introduced for tree type. * function.h: Likewise. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index a86bd1b..6f61f5c 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1550,7 +1550,9 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) else if (!is_gimple_reg_type (restype)) { restmp = resdecl; - add_local_decl (cfun, restmp); + + if (TREE_CODE (restmp) == VAR_DECL) + add_local_decl (cfun, restmp); BLOCK_VARS (DECL_INITIAL (current_function_decl)) = restmp; } else diff --git a/gcc/function.c b/gcc/function.c index ee229ad..893ca6f 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -6441,6 +6441,15 @@ match_asm_constraints_1 (rtx_insn *insn, rtx *p_sets, int noutputs) df_insn_rescan (insn); } +/* Add the decl D to the local_decls list of FUN. */ + +void +add_local_decl (struct function *fun, tree d) +{ + gcc_assert (TREE_CODE (d) == VAR_DECL); + vec_safe_push (fun-local_decls, d); +} + namespace { const pass_data pass_data_match_asm_constraints = diff --git a/gcc/function.h b/gcc/function.h index 66384e5..aa47018 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -668,11 +668,7 @@ struct GTY(()) function { /* Add the decl D to the local_decls list of FUN. */ -static inline void -add_local_decl (struct function *fun, tree d) -{ - vec_safe_push (fun-local_decls, d); -} +void add_local_decl (struct function *fun, tree d); #define FOR_EACH_LOCAL_DECL(FUN, I, D) \ FOR_EACH_VEC_SAFE_ELT_REVERSE ((FUN)-local_decls, I, D) diff --git a/gcc/testsuite/g++.dg/ipa/pr63587-1.C b/gcc/testsuite/g++.dg/ipa/pr63587-1.C new file mode 100644 index 000..cbf872e --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr63587-1.C @@ -0,0 +1,92 @@ +// PR ipa/63587 +// { dg-do compile { target c++11 } } +// { dg-options -O2 -fno-strict-aliasing } + +template class struct A +{ +}; +template typename struct B +{ + template typename struct C; +}; +class D; +template typename class F; +struct G +{ + void operator()(const D , D); +}; +class D +{ +public: + D (int); +}; +struct H +{ + H (int); +}; +template typename _Key, typename, typename, typename _Compare, typename +class I +{ + typedef _Key key_type; + template typename _Key_compare struct J + { +_Key_compare _M_key_compare; + }; + J_Compare _M_impl; + +public: + Aint _M_get_insert_unique_pos (const key_type ); + Aint _M_get_insert_hint_unique_pos (H ); + template typename... _Args int _M_emplace_hint_unique (H, _Args ...); +}; +template typename _Key, typename _Tp, typename _Compare = G, + typename _Alloc = FA_Tp +class K +{ + typedef _Key key_type; + typedef _Key value_type; + typedef typename B_Alloc::template Cvalue_type _Pair_alloc_type; + Ikey_type, value_type, int, _Compare, _Pair_alloc_type _M_t; + +public: + void operator[](key_type) + { +_M_t._M_emplace_hint_unique (0); + } +}; +template typename _Key, typename _Val, typename _KeyOfValue, + typename _Compare, typename _Alloc +Aint +I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_get_insert_unique_pos ( + const key_type p1) +{ + _M_impl._M_key_compare (p1, 0); +} +template typename _Key, typename _Val, typename _KeyOfValue, + typename _Compare, typename _Alloc +Aint +I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_get_insert_hint_unique_pos ( + H ) +{ + _M_get_insert_unique_pos (0); +} +template typename _Key, typename _Val, typename _KeyOfValue, + typename _Compare, typename _Alloc +template typename... _Args +int +I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_emplace_hint_unique ( + H p1, _Args ...) +{ + _M_get_insert_hint_unique_pos (p1); +} +namespace { +struct L; +} +void +fn1 () +{ + KD, L a; + a[0]; + KD, int b; + b[0]; +} diff --git a/gcc/testsuite/g++.dg/ipa/pr63587-2.C b/gcc/testsuite/g++.dg/ipa/pr63587-2.C new file mode 100644 index 000..f31c5bd --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr63587-2.C @@ -0,0 +1,250 @@ +// PR ipa/63587 +// { dg-do compile { target c++11 } } +// { dg-options -O2 } + +namespace boost { +class basic_cstring +{ +public: + basic_cstring (char *); +}; +template typename struct identity +{ +}; +struct make_identity; +struct function_buffer +{ +}; +template typename FunctionObj struct function_obj_invoker0 +{ + static int + invoke (function_buffer ) + { +FunctionObj f; +
Re: [PATCH, IPA ICF] Fix PR63664, PR63574 (segfault in ipa-icf pass)
On 10/29/2014 03:07 PM, Ilya Enkovich wrote: 2014-10-29 17:01 GMT+03:00 Martin Liška mli...@suse.cz: On 10/29/2014 02:45 PM, Ilya Enkovich wrote: On 29 Oct 10:34, Richard Biener wrote: On Tue, Oct 28, 2014 at 5:14 PM, Ilya Enkovich enkovich@gmail.com wrote: Hi, This patch fixes PR63664 and PR63574. Problem is in NULL types for labels not handled by ICF properly. I assume it is OK for labels to have NULL type and added check into ICF rather then fixed label generation. Bootstrapped and checked on linux-x86_64. OK for trunk? Instead it shouldn't be called for labels instead. Richard. Here is a version which doesn't compare types for labels. Is is OK? Hello. I've been just testing a patch, where the pass does not call compare_operand for gimple labels. As the pass creates mapping between labels and basic blocks, such comparison will not be necessary. OK. That would be better. Hello. Following patch fixes PR ipa/63574, where IPA ICF calls unnecessary compare_operand for LABEL_DECLs. Patch has been tested on x86_64-linux-pc without any regression and boostrap works correctly. Ready for thunk? Thanks, Martin Thanks, Ilya Thanks, Martin Bootstrapped and checked on linux-x86_64. Thanks, Ilya -- gcc/ 2014-10-29 Ilya Enkovich ilya.enkov...@intel.com PR ipa/63664 PR bootstrap/63574 * ipa-icf-gimple.c (func_checker::compatible_types_p): Assert for null args. (func_checker::compare_operand): Don't compare types for labels. gcc/testsuite/ 2014-10-29 Ilya Enkovich ilya.enkov...@intel.com PR ipa/63664 * gcc.dg/ipa/pr63664.C: New. diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 1369b74..094e8ab 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -169,6 +169,9 @@ bool func_checker::compatible_types_p (tree t1, tree t2, bool compare_polymorphic, bool first_argument) { + gcc_assert (t1); + gcc_assert (t2); + if (TREE_CODE (t1) != TREE_CODE (t2)) return return_false_with_msg (different tree types); @@ -214,11 +217,15 @@ func_checker::compare_operand (tree t1, tree t2) else if (!t1 || !t2) return false; - tree tt1 = TREE_TYPE (t1); - tree tt2 = TREE_TYPE (t2); + if (TREE_CODE (t1) != LABEL_DECL + TREE_CODE (t2) != LABEL_DECL) +{ + tree tt1 = TREE_TYPE (t1); + tree tt2 = TREE_TYPE (t2); - if (!func_checker::compatible_types_p (tt1, tt2)) -return false; + if (!func_checker::compatible_types_p (tt1, tt2)) + return false; +} base1 = get_addr_base_and_unit_offset (t1, offset1); base2 = get_addr_base_and_unit_offset (t2, offset2); diff --git a/gcc/testsuite/gcc.dg/ipa/pr63664.C b/gcc/testsuite/gcc.dg/ipa/pr63664.C new file mode 100644 index 000..31d96d4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr63664.C @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options -O2 } */ + +class test { + public: + test (int val, int *p) +{ + int_val = *p; + bool_val = (val != int_val); +} + + ~test () +{ + if (!bool_val) + return; +} + + int get_int_val () const { return int_val; } + + private: + bool bool_val; + int int_val; +}; + +static int __attribute__ ((noinline)) +f1 (int i, int *p) +{ + test obj (i, p); + return obj.get_int_val (); +} + +static int __attribute__ ((noinline)) +f2 (int i, int *p) +{ + test obj (i, p); + return obj.get_int_val (); +} + +int +f (int i, int *p) +{ + return f1 (i, p) + f2 (i, p); +} gcc/testsuite/ChangeLog: 2014-10-29 Martin Liska mli...@suse.cz * g++.dg/ipa/pr63574.C: New test. gcc/ChangeLog: 2014-10-29 Martin Liska mli...@suse.cz * ipa-icf-gimple.c (func_checker::compare_variable_decl): (func_checker::parse_labels): (func_checker::compare_gimple_label): * ipa-icf-gimple.h: diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index d3f3795..ecb9667 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -527,6 +527,10 @@ func_checker::compare_variable_decl (tree t1, tree t2) return return_with_debug (ret); } + +/* Function visits all gimple labels and creates corresponding + mapping between basic blocks and labels. */ + void func_checker::parse_labels (sem_bb *bb) { @@ -765,7 +769,8 @@ func_checker::compare_gimple_label (gimple g1, gimple g2) if (FORCED_LABEL (t1) || FORCED_LABEL (t2)) return return_false_with_msg (FORCED_LABEL); - return compare_tree_ssa_label (t1, t2); + /* As the pass build BB to label mapping, no further check is needed. */ + return true; } /* Verifies for given GIMPLEs S1 and S2 that diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h index 8487a2a..5811bd1 100644 --- a/gcc/ipa-icf-gimple.h +++ b/gcc/ipa-icf-gimple.h @@ -145,6 +145,8 @@ public: /* Memory release routine. */ ~func_checker(); + /* Function visits all