Re: [PATCH 3/3] add hash_map class

2014-06-24 Thread Martin Liška


On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote:

From: Trevor Saunders tsaund...@mozilla.com

Hi,

This patch adds a hash_map class so we can consolidate the boiler plate around
using hash_table as a map, it also allows us to get rid of pointer_map which I
do in this patch by converting its users to hash_map.


Hello Trev,
   I like your changes! One small question about pointer_set, which is unable 
of deletion of items. Do you plan to migrate and simplify hash_map to be a 
replacement for pointer_set?

Thanks,
Martin



bootstrapped + regtested without regression on x86_64-unknown-linux-gnu, ok?

Trev

gcc/

* alloc-pool.c (alloc_pool_hash): Use hash_map instead of hash_table.
* dominance.c (iterate_fix_dominators): Use hash_map instead of
pointer_map.
* hash-map.h: New file.
* ipa-comdats.c: Use hash_map instead of pointer_map.
* lto-section-out.c: Adjust.
* lto-streamer.h: Replace pointer_map with hash_map.
* symtab.c (verify_symtab): Likewise.
* tree-ssa-strlen.c (decl_to_stridxlist_htab): Likewise.
* tree-ssa-uncprop.c (val_ssa_equiv): Likewise.
* tree-streamer.h: Likewise.
* tree-streamer.c: Adjust.
* pointer-set.h: Remove pointer_map.

lto/

* lto.c (canonical_type_hash_cache): Use hash_map instead of
pointer_map.

diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c
index 49209ee..0d31835 100644
--- a/gcc/alloc-pool.c
+++ b/gcc/alloc-pool.c
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
  #include system.h
  #include alloc-pool.h
  #include hash-table.h
+#include hash-map.h
  
  #define align_eight(x) (((x+7)  3)  3)
  
@@ -69,7 +70,6 @@ static ALLOC_POOL_ID_TYPE last_id;

 size for that pool.  */
  struct alloc_pool_descriptor
  {
-  const char *name;
/* Number of pools allocated.  */
unsigned long created;
/* Gross allocated storage.  */
@@ -82,48 +82,17 @@ struct alloc_pool_descriptor
int elt_size;
  };
  
-/* Hashtable helpers.  */

-struct alloc_pool_hasher : typed_noop_remove alloc_pool_descriptor
-{
-  typedef alloc_pool_descriptor value_type;
-  typedef char compare_type;
-  static inline hashval_t hash (const alloc_pool_descriptor *);
-  static inline bool equal (const value_type *, const compare_type *);
-};
-
-inline hashval_t
-alloc_pool_hasher::hash (const value_type *d)
-{
-  return htab_hash_pointer (d-name);
-}
-
-inline bool
-alloc_pool_hasher::equal (const value_type *d,
-  const compare_type *p2)
-{
-  return d-name == p2;
-}
-
  /* Hashtable mapping alloc_pool names to descriptors.  */
-static hash_tablealloc_pool_hasher *alloc_pool_hash;
+static hash_mapconst char *, alloc_pool_descriptor *alloc_pool_hash;
  
  /* For given name, return descriptor, create new if needed.  */

  static struct alloc_pool_descriptor *
  allocate_pool_descriptor (const char *name)
  {
-  struct alloc_pool_descriptor **slot;
-
if (!alloc_pool_hash)
-alloc_pool_hash = new hash_tablealloc_pool_hasher (10);
-
-  slot = alloc_pool_hash-find_slot_with_hash (name,
-  htab_hash_pointer (name),
-  INSERT);
-  if (*slot)
-return *slot;
-  *slot = XCNEW (struct alloc_pool_descriptor);
-  (*slot)-name = name;
-  return *slot;
+alloc_pool_hash = new hash_mapconst char *, alloc_pool_descriptor (10);
+
+  return alloc_pool_hash-get_or_insert (name);
  }
  
  /* Create a pool of things of size SIZE, with NUM in each block we

@@ -375,23 +344,22 @@ struct output_info
unsigned long total_allocated;
  };
  
-/* Called via hash_table.traverse.  Output alloc_pool descriptor pointed out by

+/* Called via hash_map.traverse.  Output alloc_pool descriptor pointed out by
 SLOT and update statistics.  */
-int
-print_alloc_pool_statistics (alloc_pool_descriptor **slot,
+bool
+print_alloc_pool_statistics (const char *const name,
+const alloc_pool_descriptor d,
 struct output_info *i)
  {
-  struct alloc_pool_descriptor *d = *slot;
-
-  if (d-allocated)
+  if (d.allocated)
  {
fprintf (stderr,
   %-22s %6d %10lu %10lu(%10lu) %10lu(%10lu) %10lu(%10lu)\n,
-  d-name, d-elt_size, d-created, d-allocated,
-  d-allocated / d-elt_size, d-peak, d-peak / d-elt_size,
-  d-current, d-current / d-elt_size);
-  i-total_allocated += d-allocated;
-  i-total_created += d-created;
+  name, d.elt_size, d.created, d.allocated,
+  d.allocated / d.elt_size, d.peak, d.peak / d.elt_size,
+  d.current, d.current / d.elt_size);
+  i-total_allocated += d.allocated;
+  i-total_created += d.created;
  }
return 1;
  }
diff --git a/gcc/dominance.c b/gcc/dominance.c
index 7adec4f..be0a439 100644
--- a/gcc/dominance.c
+++ b/gcc/dominance.c
@@ -43,6 +43,7 @@
  #include diagnostic-core.h
  

Re: [PATCH 3/3] add hash_map class

2014-06-24 Thread Martin Liška


On 06/24/2014 02:40 PM, Trevor Saunders wrote:

On Tue, Jun 24, 2014 at 02:29:53PM +0200, Martin Liška wrote:

On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote:

From: Trevor Saunders tsaund...@mozilla.com

Hi,

This patch adds a hash_map class so we can consolidate the boiler plate around
using hash_table as a map, it also allows us to get rid of pointer_map which I
do in this patch by converting its users to hash_map.

Hello Trev,
I like your changes! One small question about pointer_set, which is unable 
of deletion of items. Do you plan to migrate and simplify hash_map to be a 
replacement for pointer_set?

I'm not sure I follow the question.  I imagine that hash_map will
largely stay as it is, other than perhaps some const correctness stuff,
and supporting element removal at some point.  Supporting element
removal should be trivial since I'm just wrapping hash_table which
already supports it, but I didn't want to add it until there was code
testing it.  As you see in the patch I removed pointer_map so its
already a replacement for that functionality.  As for pointer_set since
its a set not a map hash_table would seem closer to me.

Understand, yeah, I was asking if we plan to add element removal also for 
(pointer_)set? I consider such functionality useful, but it looks  not related 
to your patch. If I understand correctly, you are not planning to use hash_* as 
wrapping data structure for set.

Martin



Trev



Thanks,
Martin


bootstrapped + regtested without regression on x86_64-unknown-linux-gnu, ok?

Trev

gcc/

* alloc-pool.c (alloc_pool_hash): Use hash_map instead of hash_table.
* dominance.c (iterate_fix_dominators): Use hash_map instead of
pointer_map.
* hash-map.h: New file.
* ipa-comdats.c: Use hash_map instead of pointer_map.
* lto-section-out.c: Adjust.
* lto-streamer.h: Replace pointer_map with hash_map.
* symtab.c (verify_symtab): Likewise.
* tree-ssa-strlen.c (decl_to_stridxlist_htab): Likewise.
* tree-ssa-uncprop.c (val_ssa_equiv): Likewise.
* tree-streamer.h: Likewise.
* tree-streamer.c: Adjust.
* pointer-set.h: Remove pointer_map.

lto/

* lto.c (canonical_type_hash_cache): Use hash_map instead of
pointer_map.

diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c
index 49209ee..0d31835 100644
--- a/gcc/alloc-pool.c
+++ b/gcc/alloc-pool.c
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
  #include system.h
  #include alloc-pool.h
  #include hash-table.h
+#include hash-map.h
  #define align_eight(x) (((x+7)  3)  3)
@@ -69,7 +70,6 @@ static ALLOC_POOL_ID_TYPE last_id;
 size for that pool.  */
  struct alloc_pool_descriptor
  {
-  const char *name;
/* Number of pools allocated.  */
unsigned long created;
/* Gross allocated storage.  */
@@ -82,48 +82,17 @@ struct alloc_pool_descriptor
int elt_size;
  };
-/* Hashtable helpers.  */
-struct alloc_pool_hasher : typed_noop_remove alloc_pool_descriptor
-{
-  typedef alloc_pool_descriptor value_type;
-  typedef char compare_type;
-  static inline hashval_t hash (const alloc_pool_descriptor *);
-  static inline bool equal (const value_type *, const compare_type *);
-};
-
-inline hashval_t
-alloc_pool_hasher::hash (const value_type *d)
-{
-  return htab_hash_pointer (d-name);
-}
-
-inline bool
-alloc_pool_hasher::equal (const value_type *d,
-  const compare_type *p2)
-{
-  return d-name == p2;
-}
-
  /* Hashtable mapping alloc_pool names to descriptors.  */
-static hash_tablealloc_pool_hasher *alloc_pool_hash;
+static hash_mapconst char *, alloc_pool_descriptor *alloc_pool_hash;
  /* For given name, return descriptor, create new if needed.  */
  static struct alloc_pool_descriptor *
  allocate_pool_descriptor (const char *name)
  {
-  struct alloc_pool_descriptor **slot;
-
if (!alloc_pool_hash)
-alloc_pool_hash = new hash_tablealloc_pool_hasher (10);
-
-  slot = alloc_pool_hash-find_slot_with_hash (name,
-  htab_hash_pointer (name),
-  INSERT);
-  if (*slot)
-return *slot;
-  *slot = XCNEW (struct alloc_pool_descriptor);
-  (*slot)-name = name;
-  return *slot;
+alloc_pool_hash = new hash_mapconst char *, alloc_pool_descriptor (10);
+
+  return alloc_pool_hash-get_or_insert (name);
  }
  /* Create a pool of things of size SIZE, with NUM in each block we
@@ -375,23 +344,22 @@ struct output_info
unsigned long total_allocated;
  };
-/* Called via hash_table.traverse.  Output alloc_pool descriptor pointed out by
+/* Called via hash_map.traverse.  Output alloc_pool descriptor pointed out by
 SLOT and update statistics.  */
-int
-print_alloc_pool_statistics (alloc_pool_descriptor **slot,
+bool
+print_alloc_pool_statistics (const char *const name,
+const alloc_pool_descriptor d,
 struct output_info

Re: [PATCH 3/3] add hash_map class

2014-06-25 Thread Martin Liška


On 06/24/2014 09:31 PM, Richard Biener wrote:

On June 24, 2014 9:16:34 PM CEST, Trevor Saunders tsaund...@mozilla.com wrote:

On Tue, Jun 24, 2014 at 08:23:49PM +0200, Jan Hubicka wrote:

On 06/20/2014 12:52 PM, tsaund...@mozilla.com wrote:

From: Trevor Saunders tsaund...@mozilla.com

Hi,

This patch adds a hash_map class so we can consolidate the boiler

plate around

using hash_table as a map, it also allows us to get rid of

pointer_map which I

do in this patch by converting its users to hash_map.

Hello Trev,
I like your changes! One small question about pointer_set, which

is unable of deletion of items. Do you plan to migrate and simplify
hash_map to be a replacement for pointer_set?

Note that pointer-map use in LTO is quite performance critical. It

would be good to double

check that the new use of hash does not produce slower code.

I believe the compiled code should be very similar, but I'll do  some
measuring to check.

More important is memory use.

Richard.

Hi,
   there's memory usage graph for current trunk and before Trevor's patchset. 
It looks there's no memory footprint regression.

https://drive.google.com/file/d/0B0pisUJ80pO1OG5uY28yNFRnWTA/edit?usp=sharing

Martin




Trev


Honza






Re: [PATCH] IPA REF: refactoring

2014-06-25 Thread Martin Liška


On 06/24/2014 08:21 PM, Jan Hubicka wrote:

Hello,
this patch changes IPA REF API to c++ style. Changes were suggested and 
consulted with Honza.

Patch has been pre approved, will be committed if no comments.
Bootstrapped on x86_64-pc-linux-gnu, no regressions.

Thanks,
Martin

ChangeLog:
2014-06-22  Martin Liska  mli...@suse.cz

 * Makefile.in: Removed header file (ipa-ref-inline.h).
 * cgraph.c (cgraph_turn_edge_to_speculative): New IPA REF function
 called.
 (cgraph_speculative_call_info): Likewise.
 (cgraph_for_node_thunks_and_aliases): Likewise.
 (cgraph_for_node_and_aliases): Likewise.
 (verify_cgraph_node): Likewise.
 * cgraph.h: Batch of IPA REF functions become member functions of
 symtab_node: add_reference, maybe_add_reference, clone_references,
 clone_referring, clone_reference, find_reference,
 remove_stmt_references, remove_all_references,
 remove_all_referring, dump_references, dump_referring,
 has_alias_p, iterate_reference, iterate_referring.
 * cgraphbuild.c (record_reference): New IPA REF function used.
 (record_type_list): Likewise.
 (record_eh_tables): Likewise.
 (mark_address): Likewise.
 (mark_load): Likewise.
 (mark_store): Likewise.
 (pass_build_cgraph_edges): Likewise.
 (rebuild_cgraph_edge): Likewise.
 (cgraph_rebuild_references): Likewise.
 (pass_remove_cgraph_callee_edges): Likewise.
 * cgraphclones.c (cgraph_clone_node): Likewise.
 (cgraph_create_virtual_clone): Likewise.
 (cgraph_materialize_clone): Likewise.
 (cgraph_materialize_all_clones): Likewise.
 * cgraphunit.c (cgraph_reset_node): Likewise.
 (cgraph_reset_node): Likewise.
 (analyze_function): Likewise.
 (assemble_thunks_and_aliases): Likewise.
 (expand_function): Likewise.
 * ipa-comdats.c (propagate_comdat_group): Likewise.
 (enqueue_references): Likewise.
 * ipa-cp.c (ipcp_discover_new_direct_edges): Likewise.
 (create_specialized_node): Likewise.
 * ipa-devirt.c (referenced_from_vtable_p): Likewise.
 * ipa-inline-transform.c (can_remove_node_now_p_1): Likewise.
 * ipa-inline.c (reset_edge_caches): Likewise.
 (update_caller_keys): Likewise.
 (execute): Likewise.
 * ipa-prop.c (remove_described_reference): Likewise.
 (propagate_controlled_uses): Likewise.
 (ipa_edge_duplication_hook): Likewise.
 (ipa_modify_call_arguments): Likewise.
 * ipa-pure-const.c (propagate_pure_const): Likewise.
 * ipa-ref-inline.h: Header file removed, functions moved
 to symtab_node class.
 * ipa-ref.c (remove_reference): New class member function.
 (cannot_lead_to_return): New class member function.
 (referring_ref_list): Likewise.
 (referred_ref_list): Likewise.
 Rest of functions moved to symtab_node class.
 * ipa-ref.h: New member functions remove_reference,
 cannot_lead_to_return, referring_ref_list, referred_ref_list added
 to ipa_ref class.
 ipa_ref_list class has new member functions: first_reference,
 first_referring, clear, nreferences.
 * ipa-reference.c (analyze_function): New IPA REF function used.
 (write_node_summary_p): Likewise.
 (ipa_reference_write_optimization_summary): Likewise.
 * ipa-split.c (split_function): Likewise.
 * ipa-utils.c (ipa_reverse_postorder): Likewise.
 * ipa-visibility.c (cgraph_non_local_node_p_1): Likewise.
 (function_and_variable_visibility): Likewise.
 * ipa.c (has_addr_references_p): Likewise.
 (process_references): Argument type changed.
 (symtab_remove_unreachable_nodes): New IPA REF function used.
 (process_references): Likewise.
 (set_writeonly_bit): Likewise.
 * lto-cgraph.c: Implementation of new symtab_node member functions
 that uses new IPA REF functions.
 * lto-streamer-in.c (fixup_call_stmt_edges_1): New IPA REF function used.
 * lto-streamer-out.c (output_symbol_p): Likewise.
 * lto-streamer.h (referenced_from_this_partition_p): Argument type
 changed.
 * lto/lto-partition.c (add_references_to_partition): New IPA REF function
 used.
 (add_symbol_to_partition_1): Likewise.
 (lto_balanced_map): Likewise.
 * lto/lto-symtab.c (lto_cgraph_replace_node): Likewise.
 * symtab.c: Implementation of new IPA REF API.
 * trans-mem.c (ipa_tm_create_version_alias): New IPA REF function used.
 (ipa_tm_create_version): Likewise.
 (ipa_tm_execute): Likewise.
 * tree-emutls.c (gen_emutls_addr): Likewise.
 * tree-inline.c (copy_bb): Likewise.
 (delete_unreachable_blocks_update_callgraph): Likewise.
 * varpool.c (varpool_remove_unreferenced_decls): Likewise.
 (varpool_for_node_and_aliases): Likewise.

Patch is OK. Thanks a lot for working on it.
Note that I added the single_use pass that walks refs, so you need to update it 
too
before commiting.

Thank you for your notice, patch has been just commited.

Martin



[PATCH] Devirtualization dump functions fix

2014-06-26 Thread Martin Liška

Hello,
   I encountered similar issue to PR ipa/61462 where location_t locus = 
gimple_location (e-call_stmt) is called for e-call_stmt == NULL (Firefox with 
-flto -fdump-ipa-devirt). So that, I decided to introduce new function that is called 
for all potentially unsafe locations. I am wondering if a newly added function can be 
added in more seamless way (without playing with va_list and ATTRIBUTE_PRINTF stuff)?

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Thanks,
Martin

ChangeLog:

2014-06-26  Martin Liska  mli...@suse.cz

* include/ansidecl.h: New collection of ATTRIBUTE_NULL_PRINTF_X_0
defined.

gcc/ChangeLog:

2014-06-26  Martin Liska  mli...@suse.cz

* dumpfile.h: New function dump_printf_loc_for_stmt.
* dumpfile.c: Implementation added.
(dump_vprintf): New function.i
* cgraphunit.c: dump_printf_loc_for_stmt usage replaces
dump_printf_loc.
* gimple-fold.c: Likewise.
* ipa-devirt.c: Likewise.
* ipa-prop.c: Likewise.
* ipa.c: Likewise.
* tree-ssa-pre.c: Likewise.




diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 76b2fda1..3b01718 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -905,12 +905,9 @@ walk_polymorphic_call_targets (pointer_set_t *reachable_call_targets,
  TDF_SLIM);
 	}
   if (dump_enabled_p ())
-{
-	  location_t locus = gimple_location (edge-call_stmt);
-	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus,
-			   devirtualizing call in %s to %s\n,
-			   edge-caller-name (), target-name ());
-	}
+	dump_printf_loc_for_stmt (MSG_OPTIMIZED_LOCATIONS, edge-call_stmt,
+  devirtualizing call in %s to %s\n,
+  edge-caller-name (), target-name ());
 
 	  cgraph_make_edge_direct (edge, target);
 	  cgraph_redirect_edge_call_stmt_to_callee (edge);
diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index fd630a6..b7a791c 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -23,6 +23,12 @@ along with GCC; see the file COPYING3.  If not see
 #include diagnostic-core.h
 #include dumpfile.h
 #include tree.h
+#include basic-block.h
+#include tree-ssa-alias.h
+#include internal-fn.h
+#include gimple-expr.h
+#include is-a.h
+#include gimple.h
 #include gimple-pretty-print.h
 #include context.h
 
@@ -343,52 +349,80 @@ dump_generic_expr_loc (int dump_kind, source_location loc,
 }
 }
 
-/* Output a formatted message using FORMAT on appropriate dump streams.  */
+/* Output a formatted message using FORMAT on appropriate dump streams.
+   Accepts va_list AP as the last argument.  */
 
-void
-dump_printf (int dump_kind, const char *format, ...)
+ATTRIBUTE_NULL_PRINTF_2_0
+static void
+dump_vprintf (int dump_kind, const char *format, va_list ap)
 {
   if (dump_file  (dump_kind  pflags))
-{
-  va_list ap;
-  va_start (ap, format);
   vfprintf (dump_file, format, ap);
-  va_end (ap);
-}
 
   if (alt_dump_file  (dump_kind  alt_flags))
-{
-  va_list ap;
-  va_start (ap, format);
   vfprintf (alt_dump_file, format, ap);
-  va_end (ap);
-}
 }
 
-/* Similar to dump_printf, except source location is also printed.  */
+/* Output a formatted message using FORMAT on appropriate dump streams.  */
 
 void
-dump_printf_loc (int dump_kind, source_location loc, const char *format, ...)
+dump_printf (int dump_kind, const char *format, ...)
+{
+  va_list ap;
+  va_start (ap, format);
+  dump_vprintf (dump_kind, format, ap);
+  va_end (ap);
+}
+
+/* Similar to dump_printf, except source location is also printed.
+   Accepts va_list AP as the last argument.  */
+
+void
+dump_vprintf_loc (int dump_kind, source_location loc, const char *format,
+		  va_list ap)
 {
   if (dump_file  (dump_kind  pflags))
 {
-  va_list ap;
   dump_loc (dump_kind, dump_file, loc);
-  va_start (ap, format);
   vfprintf (dump_file, format, ap);
-  va_end (ap);
 }
 
   if (alt_dump_file  (dump_kind  alt_flags))
 {
-  va_list ap;
   dump_loc (dump_kind, alt_dump_file, loc);
-  va_start (ap, format);
   vfprintf (alt_dump_file, format, ap);
-  va_end (ap);
 }
 }
 
+/* Similar to dump_printf, except source location is also printed.  */
+
+void
+dump_printf_loc (int dump_kind, source_location loc, const char *format, ...)
+{
+  va_list ap;
+  va_start (ap, format);
+  dump_vprintf_loc (dump_kind, loc, format, ap);
+  va_end (ap);
+}
+
+/* Similar to dump_printf, except source location is also printed if STMT
+   is not null. Otherwise, fallback to dump_fprintf is called.  */
+
+void
+dump_printf_loc_for_stmt (int dump_kind, const_gimple stmt, const char *format,
+			  ...)
+{
+  va_list ap;
+  va_start (ap, format);
+
+  if (stmt)
+dump_vprintf_loc (dump_kind, gimple_location (stmt), format, ap);
+  else
+dump_vprintf (dump_kind, format, ap);
+
+  va_end (ap);
+}
+
 /* Start a dump for PHASE. Store user-supplied dump flags in
*FLAG_PTR.  Return 

Re: [PATCH] Devirtualization dump functions fix

2014-06-26 Thread Martin Liška


On 06/26/2014 03:20 PM, Richard Biener wrote:

On Thu, Jun 26, 2014 at 3:01 PM, Martin Liška mli...@suse.cz wrote:

Hello,
I encountered similar issue to PR ipa/61462 where location_t locus =
gimple_location (e-call_stmt) is called for e-call_stmt == NULL (Firefox
with -flto -fdump-ipa-devirt). So that, I decided to introduce new function
that is called for all potentially unsafe locations. I am wondering if a
newly added function can be added in more seamless way (without playing with
va_list and ATTRIBUTE_PRINTF stuff)?

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Hmm, I don't like that very much - dump_printf_loc_for_stmt still implies
stmt is not NULL.  So you could have fixed gimple_location as well.
I suppose dump_printf_loc already does sth sane with UNKNOWN_LOCATION.

Richard.

Hi,
   you are right that it is quite complex change.

Do you mean this one line change can be sufficient ?
diff --git a/gcc/gimple.h b/gcc/gimple.h
index ceefbc0..954195e 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1498,7 +1498,7 @@ gimple_set_block (gimple g, tree block)
 static inline location_t
 gimple_location (const_gimple g)
 {
-  return g-location;
+  return g ? g-location : UNKNOWN_LOCATION;
 }

 /* Return pointer to location information for statement G.  */

I will double-check if it solves the problem ;)

Martin




Thanks,
Martin

 ChangeLog:

 2014-06-26  Martin Liska  mli...@suse.cz

 * include/ansidecl.h: New collection of ATTRIBUTE_NULL_PRINTF_X_0
 defined.

 gcc/ChangeLog:

 2014-06-26  Martin Liska  mli...@suse.cz

 * dumpfile.h: New function dump_printf_loc_for_stmt.
 * dumpfile.c: Implementation added.
 (dump_vprintf): New function.i
 * cgraphunit.c: dump_printf_loc_for_stmt usage replaces
 dump_printf_loc.
 * gimple-fold.c: Likewise.
 * ipa-devirt.c: Likewise.
 * ipa-prop.c: Likewise.
 * ipa.c: Likewise.
 * tree-ssa-pre.c: Likewise.








Re: [PATCH] Devirtualization dump functions fix

2014-06-26 Thread Martin Liška


On 06/26/2014 04:18 PM, Jakub Jelinek wrote:

On Thu, Jun 26, 2014 at 04:10:03PM +0200, Richard Biener wrote:

On Thu, Jun 26, 2014 at 3:43 PM, Martin Liška mli...@suse.cz wrote:

On 06/26/2014 03:20 PM, Richard Biener wrote:

On Thu, Jun 26, 2014 at 3:01 PM, Martin Liška mli...@suse.cz wrote:

Hello,
 I encountered similar issue to PR ipa/61462 where location_t locus =
gimple_location (e-call_stmt) is called for e-call_stmt == NULL
(Firefox
with -flto -fdump-ipa-devirt). So that, I decided to introduce new
function
that is called for all potentially unsafe locations. I am wondering if a
newly added function can be added in more seamless way (without playing
with
va_list and ATTRIBUTE_PRINTF stuff)?

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Hmm, I don't like that very much - dump_printf_loc_for_stmt still implies
stmt is not NULL.  So you could have fixed gimple_location as well.
I suppose dump_printf_loc already does sth sane with UNKNOWN_LOCATION.

Richard.

Hi,
you are right that it is quite complex change.

Do you mean this one line change can be sufficient ?
diff --git a/gcc/gimple.h b/gcc/gimple.h
index ceefbc0..954195e 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1498,7 +1498,7 @@ gimple_set_block (gimple g, tree block)
  static inline location_t
  gimple_location (const_gimple g)
  {
-  return g-location;
+  return g ? g-location : UNKNOWN_LOCATION;
  }

  /* Return pointer to location information for statement G.  */

I will double-check if it solves the problem ;)

Well yes - it is of course similar broken in spirit but at least a lot
simpler ;)  I'd put a comment there why we do check g for NULL.

But it increases overhead, there are hundreds of gimple_location calls
and most of them will never pass NULL.  Can't you simply
do what you do in the inline here in the couple of spots where
the stmt might be NULL?

Sure, do you have any suggestion how should be called such function?
Suggestion: gimple_location_or_unknown ?

Thanks,
Martin



Jakub




Re: [PATCH] Devirtualization dump functions fix

2014-06-26 Thread Martin Liška


On 06/26/2014 04:29 PM, Jakub Jelinek wrote:

On Thu, Jun 26, 2014 at 04:27:49PM +0200, Martin Liška wrote:

Well yes - it is of course similar broken in spirit but at least a lot
simpler ;)  I'd put a comment there why we do check g for NULL.

But it increases overhead, there are hundreds of gimple_location calls
and most of them will never pass NULL.  Can't you simply
do what you do in the inline here in the couple of spots where
the stmt might be NULL?

Sure, do you have any suggestion how should be called such function?
Suggestion: gimple_location_or_unknown ?

gimple_location_safe or gimple_safe_location?

Jakub

Thanks, there's new patch.

Patch has been tested for Firefox with -flto -fdump-ipa-devirt.
Bootstrap and regression tests have been running.

Ready for trunk after regression tests?

ChangeLog:

2014-06-26  Martin Liska  mli...@suse.cz

* gimple.h (gimple_safe_location): New function introduced.
* cgraphunit.c (walk_polymorphic_call_targets): Usage
of gimple_safe_location replaces gimple_location.
(gimple_fold_call): Likewise.
* ipa-devirt.c (ipa_devirt): Likewise.
* ipa-prop.c (ipa_make_edge_direct_to_target): Likewise.
* ipa.c (walk_polymorphic_call_targets): Likewise.
* tree-ssa-pre.c (eliminate_dom_walker::before_dom_children): Likewise.
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 76b2fda1..2bf5216 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -906,7 +906,7 @@ walk_polymorphic_call_targets (pointer_set_t *reachable_call_targets,
 	}
   if (dump_enabled_p ())
 {
-	  location_t locus = gimple_location (edge-call_stmt);
+	  location_t locus = gimple_safe_location (edge-call_stmt);
 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus,
 			   devirtualizing call in %s to %s\n,
 			   edge-caller-name (), target-name ());
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 403dee7..ad230be 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -387,7 +387,7 @@ fold_gimple_assign (gimple_stmt_iterator *si)
 		  fndecl = builtin_decl_implicit (BUILT_IN_UNREACHABLE);
 		if (dump_enabled_p ())
 		  {
-			location_t loc = gimple_location (stmt);
+			location_t loc = gimple_safe_location (stmt);
 			dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
 	 resolving virtual function address 
 	 reference to function %s\n,
@@ -1131,7 +1131,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
 	  tree lhs = gimple_call_lhs (stmt);
 	  if (dump_enabled_p ())
 		{
-		  location_t loc = gimple_location (stmt);
+		  location_t loc = gimple_safe_location (stmt);
 		  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
    folding virtual function call to %s\n,
 		 		   targets.length () == 1
diff --git a/gcc/gimple.h b/gcc/gimple.h
index ceefbc0..d401d47 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -1501,6 +1501,15 @@ gimple_location (const_gimple g)
   return g-location;
 }
 
+/* Return location information for statement G if g is not NULL.
+   Otherwise, UNKNOWN_LOCATION is returned.  */
+
+static inline location_t
+gimple_safe_location (const_gimple g)
+{
+  return g ? gimple_location (g) : UNKNOWN_LOCATION;
+}
+
 /* Return pointer to location information for statement G.  */
 
 static inline const location_t *
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index 21f4f11..4e5dae8 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -2080,7 +2080,7 @@ ipa_devirt (void)
 	  {
 		if (dump_enabled_p ())
   {
-location_t locus = gimple_location (e-call_stmt);
+location_t locus = gimple_safe_location (e-call_stmt);
 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, locus,
  speculatively devirtualizing call in %s/%i to %s/%i\n,
  n-name (), n-order,
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 1e10b53..c6967be 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -2673,17 +2673,11 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target)
 
   if (dump_enabled_p ())
 	{
-	  const char *fmt = discovered direct call to non-function in %s/%i, 
-making it __builtin_unreachable\n;
-
-	  if (ie-call_stmt)
-		{
-		  location_t loc = gimple_location (ie-call_stmt);
-		  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt,
-   ie-caller-name (), ie-caller-order);
-		}
-	  else if (dump_file)
-		fprintf (dump_file, fmt, ie-caller-name (), ie-caller-order);
+	  location_t loc = gimple_safe_location (ie-call_stmt);
+	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
+			   discovered direct call to non-function in %s/%i, 
+			   making it __builtin_unreachable\n,
+			   ie-caller-name (), ie-caller-order);
 	}
 
 	  target = builtin_decl_implicit (BUILT_IN_UNREACHABLE);
@@ -2745,18 +2739,11 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target

Re: [PATCH 3/5] IPA ICF pass

2014-06-26 Thread Martin Liška


On 06/24/2014 10:31 PM, Jeff Law wrote:

On 06/13/14 04:44, mliska wrote:

Hello,
this is core of IPA ICF patchset. It adds new pass and registers all needed 
stuff related to a newly introduced interprocedural optimization.

Algorithm description:
   In LGEN, we visit all read-only variables and functions. For each symbol, a 
hash value based on e.g. number of arguments,
   number of BB, GIMPLE CODES is computed (similar hash is computed for 
read-only variables). This kind of information is streamed
   for LTO.

   In WPA, we build congruence classes for all symbols having a same hash 
value. For functions, these classes are subdivided in WPA by argument type 
comparison. Each reference (a call or a variable reference) to another semantic 
item candidate is marked and stored for further congruence class reduction 
(similar algorithm as Value Numbering: 
www.cs.ucr.edu/~gupta/teaching/553-07/Papers/value.pdf).

   For every congruence class of functions with more than one semantic 
function, we load function body. Having this information, we can
   process complete semantic function equality and subdivide such congruence 
class. Read-only variable class members are also deeply compared.

   After that, we process Value numbering algorithm to do a final subdivision. 
Finally, all items belonging to a congruence class with more than one
   item are merged.

Martin

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Jan Hubicka  hubi...@ucw.cz

* Makefile.in: New pass object file added.
* common.opt: New -fipa-icf flag introduced.
* doc/invoke.texi: Documentation enhanced for the pass.
* lto-section-in.c: New LTO section for a summary created by IPA-ICF.
* lto-streamer.h: New section name introduced.
* opts.c: Optimization is added to -O2.
* passes.def: New pass added.
* timevar.def: New time var for IPA-ICF.
* tree-pass.h: Pass construction function.
* ipa-icf.h: New pass header file added.
* ipa-icf.c: New pass source file added.

Hi Jeff,
   I must agree that the implementation of the patch is quite big. Suggested 
split makes sense, I'll do it.

You'll note many of my comments are do you need to   You may in fact be 
handling that stuff correctly, they're just things I'd like you to verify are properly 
handled.  If they're properly handled just say so :-)

At a high level, I think this needs to be broken down a bit more. We've got two 
high level concepts in ipa-icf.  One is all the equivalence testing the other 
is using that information for the icf optimization.

Splitting out the equivalence testing seems like a good thing to do as there's 
other contexts where it would be useful.

Overall I think you're on the right path and we just need to iterate a bit on 
this part of the patchset.



@@ -7862,6 +7863,14 @@ it may significantly increase code size
  (see @option{--param ipcp-unit-growth=@var{value}}).
  This flag is enabled by default at @option{-O3}.

+@item -fipa-icf
+@opindex fipa-icf
+Perform Identical Code Folding for functions and read-only variables.
+Behavior is similar to Gold Linker ICF optimization. Symbols proved
+as semantically equivalent are redirected to corresponding symbol. The pass
+sensitively decides for usage of alias, thunk or local redirection.
+This flag is enabled by default at @option{-O2}.

So you've added this at -O2, what is the general compile-time impact? Would it 
make more sense to instead have it be part of -O3, particularly since ICF is 
rarely going to improve performance (sans icache issues).

This was Honza's idea to put the optimization for -O2, I'll measure 
compile-time impact.




+
+/* Interprocedural Identical Code Folding for functions and
+   read-only variables.
+
+   The goal of this transformation is to discover functions and read-only
+   variables which do have exactly the same semantics.
+
+   In case of functions,
+   we could either create a virtual clone or do a simple function wrapper
+   that will call equivalent function. If the function is just locally visible,
+   all function calls can be redirected. For read-only variables, we create
+   aliases if possible.
+
+   Optimization pass arranges as follows:
+   1) All functions and read-only variables are visited and internal
+  data structure, either sem_function or sem_variables is created.
+   2) For every symbol from the previoues step, VAR_DECL and FUNCTION_DECL are
+  saved and matched to corresponding sem_items.

s/previoues/previous/


+   3) These declaration are ignored for equality check and are solved
+  by Value Numbering algorithm published by Alpert, Zadeck in 1992.
+   4) We compute hash value for each symbol.
+   5) Congruence classes are created based on hash value. If hash value are
+  equal, equals function is called and symbols are deeply compared.
+  We must prove that all SSA names, declarations and other items
+  correspond.
+   6) Value Numbering is executed for these classes. 

Re: [PATCH] Devirtualization dump functions fix

2014-06-27 Thread Martin Liška


On 06/27/2014 10:38 AM, Richard Biener wrote:

On Thu, Jun 26, 2014 at 5:58 PM, Martin Liška mli...@suse.cz wrote:

On 06/26/2014 04:29 PM, Jakub Jelinek wrote:

On Thu, Jun 26, 2014 at 04:27:49PM +0200, Martin Liška wrote:

Well yes - it is of course similar broken in spirit but at least a lot
simpler ;)  I'd put a comment there why we do check g for NULL.

But it increases overhead, there are hundreds of gimple_location calls
and most of them will never pass NULL.  Can't you simply
do what you do in the inline here in the couple of spots where
the stmt might be NULL?

Sure, do you have any suggestion how should be called such function?
Suggestion: gimple_location_or_unknown ?

gimple_location_safe or gimple_safe_location?

 Jakub

Thanks, there's new patch.

Patch has been tested for Firefox with -flto -fdump-ipa-devirt.
Bootstrap and regression tests have been running.

Ready for trunk after regression tests?

Ok with s/gimple_safe_location/gimple_location_safe/ (I think that's
the more canonical naming - what's a safe location after all?)

Thanks,
Richard.

You are right, gimple_location_safe sounds better.
Patch has been just committed with your change.

Thanks,
Martin



ChangeLog:

2014-06-26  Martin Liska  mli...@suse.cz

 * gimple.h (gimple_safe_location): New function introduced.
 * cgraphunit.c (walk_polymorphic_call_targets): Usage
 of gimple_safe_location replaces gimple_location.
 (gimple_fold_call): Likewise.
 * ipa-devirt.c (ipa_devirt): Likewise.
 * ipa-prop.c (ipa_make_edge_direct_to_target): Likewise.
 * ipa.c (walk_polymorphic_call_targets): Likewise.
 * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children): Likewise.




[PATCH] IPA REF: alias refactoring

2014-06-27 Thread Martin Liška

Hi,
this patch enhances alias manipulation for symtab_node. Honza suggested 
following changes.

Patch is pre approved, will be committed if no comments and regressions.
Bootstrapped on x86_64-pc-linux-gnu, regression tests have been running.

Thanks,
Martin

gcc/ChangeLog:

* cgraph.h (iterate_direct_aliases): New function.
(FOR_EACH_ALIAS): New macro iterates all direct aliases for a node.
* cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of
FOR_EACH_ALIAS added.
(cgraph_for_node_and_aliases): Likewise.
* cgraphunit.c (assemble_thunks_and_aliases): Likewise.
* ipa-inline.c (reset_edge_caches): Likewise.
(update_caller_keys): Likewise.
* trans-mem.c (ipa_tm_execute): Likewise.
*varpool.c (varpool_analyze_node): Likewise.
(varpool_for_node_and_aliases): Likewise.
* ipa-ref.h (first_referring_alias): New function.
(last_referring_alias): Likewise.
* ipa-ref.c (ipa_ref::remove_reference): Removal function
is sensitive to IPA_REF_ALIASes.
* symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type
are put at the beginning of the list.
(symtab_node::iterate_direct_aliases): New function.

gcc/lto/ChangeLog:

* lto-partition.c (add_symbol_to_partition_1): Usage of
FOR_EACH_ALIAS added.

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 7360f77..568eb45 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2194,8 +2194,7 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node,
 bool include_overwritable)
 {
   struct cgraph_edge *e;
-  int i;
-  struct ipa_ref *ref = NULL;
+  struct ipa_ref *ref;
 
   if (callback (node, data))
 return true;
@@ -2206,16 +2205,16 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node,
   if (cgraph_for_node_thunks_and_aliases (e-caller, callback, data,
 	  include_overwritable))
 	return true;
-  for (i = 0; node-iterate_referring (i, ref); i++)
-if (ref-use == IPA_REF_ALIAS)
-  {
-	struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
-	if (include_overwritable
-	|| cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
-	  if (cgraph_for_node_thunks_and_aliases (alias, callback, data,
-		  include_overwritable))
-	return true;
-  }
+
+  FOR_EACH_ALIAS (node, ref)
+{
+  struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
+  if (include_overwritable
+	  || cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
+	if (cgraph_for_node_thunks_and_aliases (alias, callback, data,
+		include_overwritable))
+	  return true;
+}
   return false;
 }
 
@@ -2229,21 +2228,20 @@ cgraph_for_node_and_aliases (struct cgraph_node *node,
 			 void *data,
 			 bool include_overwritable)
 {
-  int i;
-  struct ipa_ref *ref = NULL;
+  struct ipa_ref *ref;
 
   if (callback (node, data))
 return true;
-  for (i = 0; node-iterate_referring (i, ref); i++)
-if (ref-use == IPA_REF_ALIAS)
-  {
-	struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
-	if (include_overwritable
-	|| cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
-  if (cgraph_for_node_and_aliases (alias, callback, data,
-	   include_overwritable))
-	return true;
-  }
+
+  FOR_EACH_ALIAS (node, ref)
+{
+  struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
+  if (include_overwritable
+	  || cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
+	if (cgraph_for_node_and_aliases (alias, callback, data,
+	 include_overwritable))
+	  return true;
+}
   return false;
 }
 
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 0761e26..3ab0516 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -254,6 +254,9 @@ public:
   /* Iterates I-th referring item in the list, REF is also set.  */
   struct ipa_ref *iterate_referring (unsigned i, struct ipa_ref *ref);
 
+  /* Iterates I-th referring alias item in the list, REF is also set.  */
+  struct ipa_ref *iterate_direct_aliases (unsigned i, struct ipa_ref *ref);
+
   /* Vectors of referring and referenced entities.  */
   struct ipa_ref_list ref_list;
 
@@ -281,6 +284,10 @@ public:
   priority_type get_init_priority ();
 };
 
+/* Walk all aliases for NODE.  */
+#define FOR_EACH_ALIAS(node, alias) \
+   for (unsigned x_i = 0; node-iterate_direct_aliases (x_i, alias); x_i++)
+
 enum availability
 {
   /* Not yet set by cgraph_function_body_availability.  */
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 76b2fda1..b0478cb 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1711,8 +1711,7 @@ static void
 assemble_thunks_and_aliases (struct cgraph_node *node)
 {
   struct cgraph_edge *e;
-  int i;
-  struct ipa_ref *ref = NULL;
+  struct ipa_ref *ref;
 
   for (e = node-callers; e;)
 if (e-caller-thunk.thunk_p)
@@ -1725,20 +1724,20 @@ assemble_thunks_and_aliases (struct cgraph_node *node)
   }
 else
   e = e-next_caller;
-  for (i = 0; node-iterate_referring 

Re: [PATCH 2/5] Existing call graph infrastructure enhancement

2014-06-30 Thread Martin Liška


On 06/17/2014 10:00 PM, Jeff Law wrote:

On 06/13/14 04:26, mliska wrote:

Hi,
 this small patch prepares remaining needed infrastructure for the new pass.

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* ipa-utils.h (polymorphic_type_binfo_p): Function marked external
instead of static.
* ipa-devirt.c (polymorphic_type_binfo_p): Likewise.
* ipa-prop.h (count_formal_params): Likewise.
* ipa-prop.c (count_formal_params): Likewise.
* ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge
profiles for semantically equivalent functions.
* passes.c (do_per_function): If we load body of a function during WPA,
this condition should behave same.
* varpool.c (ctor_for_folding): More tolerant assert for variable
aliases created during WPA.

Presumably we don't have any useful way to merge the cases where we have provides 
for SRC  DST in ipa_merge_profiles or even to guess which is more useful when 
presented with both?  Does it make sense to log this into a debugging file when we 
drop one?

Hello,
   this merge function was written by Honza, what do you think Honza about this 
note?


I think this patch is fine.  If adding logging makes sense, then feel free to 
do so and consider that trivial change pre-approved.

I made a small change to this patch, where I moved 'gsi_next_nonvirtual_phi' 
from the pass to gimple-iterator.h.

Ready for trunk with this change?
Thanks,
Martin

gcc/ChangeLog

2014-06-30  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* gimple-iterator.h (gsi_next_nonvirtual_phi): New function.
* ipa-utils.h (polymorphic_type_binfo_p): Function marked external
instead of static.
* ipa-devirt.c (polymorphic_type_binfo_p): Likewise.
* ipa-prop.h (count_formal_params): Likewise.
* ipa-prop.c (count_formal_params): Likewise.
* ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge
profiles for semantically equivalent functions.
* passes.c (do_per_function): If we load body of a function during WPA,
this condition should behave same.
* varpool.c (ctor_for_folding): More tolerant assert for variable
aliases created during WPA.



Jeff



diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
index 909d58b..47168b9 100644
--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
@@ -281,6 +281,30 @@ gsi_last_nondebug_bb (basic_block bb)
   return i;
 }
 
+/* Iterates I statement iterator to the next non-virtual statement.  */
+
+static inline void
+gsi_next_nonvirtual_phi (gimple_stmt_iterator *i)
+{
+  gimple phi;
+
+  if (gsi_end_p (*i))
+return;
+
+  phi = gsi_stmt (*i);
+  gcc_assert (phi != NULL);
+
+  while (virtual_operand_p (gimple_phi_result (phi)))
+{
+  gsi_next (i);
+
+  if (gsi_end_p (*i))
+	return;
+
+  phi = gsi_stmt (*i);
+}
+}
+
 /* Return the basic block associated with this iterator.  */
 
 static inline basic_block
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index d6b85bf..2f62323 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -176,7 +176,7 @@ struct GTY(()) odr_type_d
inheritance (because vtables are shared).  Look up the BINFO of type
and check presence of its vtable.  */
 
-static inline bool
+bool
 polymorphic_type_binfo_p (tree binfo)
 {
   /* See if BINFO's type has an virtual table associtated with it.  */
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 68efc77..bb2fbf3 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -210,7 +210,7 @@ ipa_populate_param_decls (struct cgraph_node *node,
 
 /* Return how many formal parameters FNDECL has.  */
 
-static inline int
+int
 count_formal_params (tree fndecl)
 {
   tree parm;
diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
index 8886e93..bc6249e 100644
--- a/gcc/ipa-prop.h
+++ b/gcc/ipa-prop.h
@@ -529,6 +529,7 @@ void ipa_free_all_edge_args (void);
 void ipa_free_all_structures_after_ipa_cp (void);
 void ipa_free_all_structures_after_iinln (void);
 void ipa_register_cgraph_hooks (void);
+int count_formal_params (tree fndecl);
 
 /* This function ensures the array of node param infos is big enough to
accommodate a structure for all nodes and reallocates it if not.  */
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index c191210..d58b170 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -660,13 +660,8 @@ ipa_merge_profiles (struct cgraph_node *dst,
   if (dst-tp_first_run  src-tp_first_run  src-tp_first_run)
 dst-tp_first_run = src-tp_first_run;
 
-  if (src-profile_id)
-{
-  if (!dst-profile_id)
-	dst-profile_id = src-profile_id;
-  else
-	gcc_assert (src-profile_id == dst-profile_id);
-}
+  if (src-profile_id  !dst-profile_id)
+dst-profile_id = src-profile_id;
 
   if (!dst-count)
 return;
diff --git a/gcc/ipa-utils.h b/gcc/ipa-utils.h
index a2c985a..996249a 100644
--- a/gcc/ipa-utils.h
+++ b/gcc/ipa-utils.h
@@ -72,6 +72,8 @@ struct odr_type_d;
 typedef odr_type_d *odr_type;

Re: [PATCH 4/5] Existing tests fix

2014-06-30 Thread Martin Liška


On 06/17/2014 09:52 PM, Jeff Law wrote:

On 06/13/14 04:48, mliska wrote:

Hi,
   many tests rely on a precise number of scanned functions in a dump file. If 
IPA ICF decides to merge some function and(or) read-only variables, counts do 
not match.

Martin

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* c-c++-common/rotate-1.c: Text
* c-c++-common/rotate-2.c: New test.
* c-c++-common/rotate-3.c: Likewise.
* c-c++-common/rotate-4.c: Likewise.
* g++.dg/cpp0x/rv-return.C: Likewise.
* g++.dg/cpp0x/rv1n.C: Likewise.
* g++.dg/cpp0x/rv1p.C: Likewise.
* g++.dg/cpp0x/rv2n.C: Likewise.
* g++.dg/cpp0x/rv3n.C: Likewise.
* g++.dg/cpp0x/rv4n.C: Likewise.
* g++.dg/cpp0x/rv5n.C: Likewise.
* g++.dg/cpp0x/rv6n.C: Likewise.
* g++.dg/cpp0x/rv7n.C: Likewise.
* gcc.dg/ipa/ipacost-1.c: Likewise.
* gcc.dg/ipa/ipacost-2.c: Likewise.
* gcc.dg/ipa/ipcp-agg-6.c: Likewise.
* gcc.dg/ipa/remref-2a.c: Likewise.
* gcc.dg/ipa/remref-2b.c: Likewise.
* gcc.dg/pr46309-2.c: Likewise.
* gcc.dg/torture/ipa-pta-1.c: Likewise.
* gcc.dg/tree-ssa/andor-3.c: Likewise.
* gcc.dg/tree-ssa/andor-4.c: Likewise.
* gcc.dg/tree-ssa/andor-5.c: Likewise.
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
* gcc.dg/vect/vect-cond-10.c: Likewise.
* gcc.dg/vect/vect-cond-9.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise.
* gcc.target/i386/bmi-1.c: Likewise.
* gcc.target/i386/bmi-2.c: Likewise.
* gcc.target/i386/pr56564-2.c: Likewise.
* g++.dg/opt/pr30965.C: Likewise.
* g++.dg/tree-ssa/pr19637.C: Likewise.
* gcc.dg/guality/csttest.c: Likewise.
* gcc.dg/ipa/iinline-4.c: Likewise.
* gcc.dg/ipa/iinline-7.c: Likewise.
* gcc.dg/ipa/ipa-pta-13.c: Likewise.

I know this is the least interesting part of your changes, but it's also simple 
and mechanical and thus trivial to review. Approved, but obviously don't 
install until the rest of your patch has been approved.

Similar changes for recently added tests or cases where you might improve ICF 
requiring similar tweaks to existing tests are pre-approved as well.

jeff


Hello,
   I fixed few more tests and added correct ChangeLog message.

gcc/testsuite/ChangeLog

2014-06-30  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* c-c++-common/rotate-1.c: Test fixed.
* c-c++-common/rotate-2.c: Likewise.
* c-c++-common/rotate-3.c: Likewise.
* c-c++-common/rotate-4.c: Likewise.
* g++.dg/cpp0x/rv-return.C: Likewise.
* g++.dg/cpp0x/rv1n.C: Likewise.
* g++.dg/cpp0x/rv1p.C: Likewise.
* g++.dg/cpp0x/rv2n.C: Likewise.
* g++.dg/cpp0x/rv3n.C: Likewise.
* g++.dg/cpp0x/rv4n.C: Likewise.
* g++.dg/cpp0x/rv5n.C: Likewise.
* g++.dg/cpp0x/rv6n.C: Likewise.
* g++.dg/cpp0x/rv7n.C: Likewise.
* g++.dg/ipa/devirt-g-1.C: Likewise.
* g++.dg/ipa/inline-1.C: Likewise.
* g++.dg/ipa/inline-2.C: Likewise.
* g++.dg/ipa/inline-3.C: Likewise.
* g++.dg/opt/pr30965.C: Likewise.
* g++.dg/tree-ssa/pr19637.C: Likewise.
* gcc.dg/guality/csttest.c: Likewise.
* gcc.dg/ipa/iinline-4.c: Likewise.
* gcc.dg/ipa/iinline-7.c: Likewise.
* gcc.dg/ipa/ipa-pta-13.c: Likewise.
* gcc.dg/ipa/ipacost-1.c: Likewise.
* gcc.dg/ipa/ipacost-2.c: Likewise.
* gcc.dg/ipa/ipcp-agg-6.c: Likewise.
* gcc.dg/ipa/remref-2a.c: Likewise.
* gcc.dg/ipa/remref-2b.c: Likewise.
* gcc.dg/pr46309-2.c: Likewise.
* gcc.dg/torture/ipa-pta-1.c: Likewise.
* gcc.dg/tree-ssa/andor-3.c: Likewise.
* gcc.dg/tree-ssa/andor-4.c: Likewise.
* gcc.dg/tree-ssa/andor-5.c: Likewise.
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
* gcc.dg/vect/vect-cond-10.c: Likewise.
* gcc.dg/vect/vect-cond-9.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise.
* gcc.target/i386/bmi-1.c: Likewise.
* gcc.target/i386/bmi-2.c: Likewise.
* gcc.target/i386/pr56564-2.c: Likewise.

Thank you,
Martin

diff --git a/gcc/testsuite/c-c++-common/rotate-1.c b/gcc/testsuite/c-c++-common/rotate-1.c
index afdaa28..bca9dd8 100644
--- a/gcc/testsuite/c-c++-common/rotate-1.c
+++ b/gcc/testsuite/c-c++-common/rotate-1.c
@@ -1,6 +1,6 @@
 /* Check rotate pattern detection.  */
 /* { dg-do compile } */
-/* { dg-options -O2 -fdump-tree-optimized } */
+/* { dg-options -O2 -fno-ipa-icf -fdump-tree-optimized } */
 /* { dg-final { scan-tree-dump-times r\[]\[] 96 optimized } } */
 /* { dg-final { cleanup-tree-dump optimized } } */
 
diff --git a/gcc/testsuite/c-c++-common/rotate-2.c b/gcc/testsuite/c-c++-common/rotate-2.c
index 109fd32..4ffa218 100644
--- a/gcc/testsuite/c-c++-common/rotate-2.c
+++ b/gcc/testsuite/c-c++-common/rotate-2.c
@@ -1,6 +1,6 @@
 /* Check 

Re: [PATCH 5/5] New tests introduction

2014-06-30 Thread Martin Liška


On 06/17/2014 09:53 PM, Jeff Law wrote:

On 06/13/14 05:16, mliska wrote:

Hi,
this is a new collection of tests for IPA ICF pass.

Martin

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* gcc/testsuite/g++.dg/ipa/ipa-se-1.C: New test.
* gcc/testsuite/g++.dg/ipa/ipa-se-2.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-se-3.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-se-4.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-se-5.C: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-1.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-10.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-11.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-12.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-13.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-14.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-15.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-16.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-17.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-18.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-19.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-2.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-20.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-21.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-22.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-23.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-24.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-25.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-26.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-27.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-28.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-3.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-4.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-5.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-6.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-7.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-8.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-se-9.c: Likewise.

Also approved, but please don't install entire the entire kit is approved.

I'd like to applaud you and Jan for including a nice baseline of tests.

jeff


Hi,
   there's updatd baseline of tests.

Martin

gcc/testsuite/ChangeLog:

2014-06-30 Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* gcc/testsuite/g++.dg/ipa/ipa-icf-1.C: New test.
* gcc/testsuite/g++.dg/ipa/ipa-icf-2.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-icf-3.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-icf-4.C: Likewise.
* gcc/testsuite/g++.dg/ipa/ipa-icf-5.C: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-1.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-10.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-11.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-12.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-13.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-14.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-15.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-16.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-17.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-18.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-19.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-2.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-20.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-22.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-23.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-24.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-25.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-26.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-27.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-3.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-4.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-5.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-6.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-7.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-8.c: Likewise.
* gcc/testsuite/gcc.dg/ipa/ipa-icf-9.c: Likewise.

Martin
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C
new file mode 100644
index 000..d27abf4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-1.C
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-ipa-icf  } */
+
+class A
+{
+public:
+  __attribute__ ((noinline))
+  virtual int Foo2()
+  {
+return v;
+  }
+
+  float f;
+  int v;
+};
+
+class B
+{
+public:
+  __attribute__ ((noinline))
+  int Bar2()
+  {
+return v;
+  }
+
+  float f, aaa;
+  int v;
+};
+
+int main()
+{
+  A a;
+  B b;
+
+  a.Foo2();
+  b.Bar2();
+
+  return 12345;
+}
+
+/* { dg-final { scan-ipa-dump-not Semantic equality hit: icf  } } */
+/* { dg-final { scan-ipa-dump Equal symbols: 0 icf  } } */
+/* { dg-final { cleanup-ipa-dump icf } } */
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-2.C 

Re: [PATCH] IPA REF: alias refactoring

2014-06-30 Thread Martin Liška


On 06/28/2014 08:49 AM, Jan Hubicka wrote:

Hi,
 this patch enhances alias manipulation for symtab_node. Honza suggested 
following changes.

Patch is pre approved, will be committed if no comments and regressions.
Bootstrapped on x86_64-pc-linux-gnu, regression tests have been running.

Thanks,
Martin

gcc/ChangeLog:

 * cgraph.h (iterate_direct_aliases): New function.
 (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node.
 * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of
 FOR_EACH_ALIAS added.
 (cgraph_for_node_and_aliases): Likewise.
 * cgraphunit.c (assemble_thunks_and_aliases): Likewise.
 * ipa-inline.c (reset_edge_caches): Likewise.
 (update_caller_keys): Likewise.
 * trans-mem.c (ipa_tm_execute): Likewise.
 *varpool.c (varpool_analyze_node): Likewise.
 (varpool_for_node_and_aliases): Likewise.
 * ipa-ref.h (first_referring_alias): New function.
 (last_referring_alias): Likewise.

I missed it last time around, I think first_alias/last_alias are better names.
first_alias is unused. If you added it I guess FOR_EACH_ALIAS should use it.


Hello,
   I renamed these functions as you suggested and has_aliases_p 
predication was also added.
Previous patch has an error in ipa_ref::remove_refence, this patch has 
been regtested and the problem is removed.




We probably also can bring has_aliases_p inline and implement it using
first_referring_alias.


+  /* If deleted item is IPA_REF_ALIAS, we have to move last
+  item of IPA_REF_LIST type to the deleted position. After that
+  we replace last node with deletion slot.  */
+  struct ipa_ref *last_alias = list-last_referring_alias ();

You can avoid walking to last alias when the removed item is not IPA_REF_ALIAS.


+
+  /* IPA_REF_ALIAS is always put at the beginning of the list.   */

inserted?

Type fixed.

If no other comments will come, I consider the patch as preapproved.

Thanks,
Martin

gcc/ChangeLog:

* cgraph.h (iterate_direct_aliases): New function.
(FOR_EACH_ALIAS): New macro iterates all direct aliases for a node.
* cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of
FOR_EACH_ALIAS added.
(cgraph_for_node_and_aliases): Likewise.
* cgraphunit.c (assemble_thunks_and_aliases): Likewise.
* ipa-inline.c (reset_edge_caches): Likewise.
(update_caller_keys): Likewise.
* trans-mem.c (ipa_tm_execute): Likewise.
*varpool.c (varpool_analyze_node): Likewise.
(varpool_for_node_and_aliases): Likewise.
* ipa-ref.h (first_alias): New function.
(last_alias): Likewise.
(has_aliases_p): Likewise.
* ipa-ref.c (ipa_ref::remove_reference): Removal function
is sensitive to IPA_REF_ALIASes.
* symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type
are put at the beginning of the list.
(symtab_node::iterate_direct_aliases): New function.

gcc/lto/ChangeLog:

* lto-partition.c (add_symbol_to_partition_1): Usage of
FOR_EACH_ALIAS added.



OK with these changes (or if you already comitted, just do them incrementally)

Honza


diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 43428be..41dcaf9 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2198,8 +2198,7 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node,
 bool include_overwritable)
 {
   struct cgraph_edge *e;
-  int i;
-  struct ipa_ref *ref = NULL;
+  struct ipa_ref *ref;
 
   if (callback (node, data))
 return true;
@@ -2210,16 +2209,16 @@ cgraph_for_node_thunks_and_aliases (struct cgraph_node *node,
   if (cgraph_for_node_thunks_and_aliases (e-caller, callback, data,
 	  include_overwritable))
 	return true;
-  for (i = 0; node-iterate_referring (i, ref); i++)
-if (ref-use == IPA_REF_ALIAS)
-  {
-	struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
-	if (include_overwritable
-	|| cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
-	  if (cgraph_for_node_thunks_and_aliases (alias, callback, data,
-		  include_overwritable))
-	return true;
-  }
+
+  FOR_EACH_ALIAS (node, ref)
+{
+  struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
+  if (include_overwritable
+	  || cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
+	if (cgraph_for_node_thunks_and_aliases (alias, callback, data,
+		include_overwritable))
+	  return true;
+}
   return false;
 }
 
@@ -2233,21 +2232,20 @@ cgraph_for_node_and_aliases (struct cgraph_node *node,
 			 void *data,
 			 bool include_overwritable)
 {
-  int i;
-  struct ipa_ref *ref = NULL;
+  struct ipa_ref *ref;
 
   if (callback (node, data))
 return true;
-  for (i = 0; node-iterate_referring (i, ref); i++)
-if (ref-use == IPA_REF_ALIAS)
-  {
-	struct cgraph_node *alias = dyn_cast cgraph_node * (ref-referring);
-	if (include_overwritable
-	|| cgraph_function_body_availability (alias)  AVAIL_OVERWRITABLE)
-  if 

Re: [PATCH] IPA REF: alias refactoring

2014-07-01 Thread Martin Liška


On 07/01/2014 12:21 AM, Jan Hubicka wrote:

gcc/ChangeLog:

 * cgraph.h (iterate_direct_aliases): New function.
 (FOR_EACH_ALIAS): New macro iterates all direct aliases for a node.
 * cgraph.c (cgraph_for_node_thunks_and_aliases): Usage of
 FOR_EACH_ALIAS added.
 (cgraph_for_node_and_aliases): Likewise.
 * cgraphunit.c (assemble_thunks_and_aliases): Likewise.
 * ipa-inline.c (reset_edge_caches): Likewise.
 (update_caller_keys): Likewise.
 * trans-mem.c (ipa_tm_execute): Likewise.
 *varpool.c (varpool_analyze_node): Likewise.
 (varpool_for_node_and_aliases): Likewise.
 * ipa-ref.h (first_alias): New function.
 (last_alias): Likewise.
 (has_aliases_p): Likewise.
 * ipa-ref.c (ipa_ref::remove_reference): Removal function
 is sensitive to IPA_REF_ALIASes.
 * symtab.c (symtab_node::add_reference): Node of IPA_REF_ALIAS type
 are put at the beginning of the list.
 (symtab_node::iterate_direct_aliases): New function.

gcc/lto/ChangeLog:

 * lto-partition.c (add_symbol_to_partition_1): Usage of
 FOR_EACH_ALIAS added.

OK, thanks!

Honza

Thanks,
patch has been just commited.

Martin


[PATCH, DOC]: Fix for Options That Control Optimization section

2014-07-11 Thread Martin Liška

Hello,
   I fixed  Options That Control Optimization section according to 'gcc -Q 
--help=optimizers' and after consultation with Jakub, I added missing 
-foptimize-strlen option.

Ready for trunk?
Martin

ChangeLog:

2014-07-11  Martin Liska  mli...@suse.cz

* doc/invoke.texi: Added missing options to options
that control optimization. Missing -foptimize-strlen option
introduced.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a83f6c6..8fa63ff 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6921,25 +6921,31 @@ compilation time.
 @option{-O} turns on the following optimization flags:
 @gccoptlist{
 -fauto-inc-dec @gol
+-fbranch-count-reg @gol
+-fcombine-stack-adjustments @gol
 -fcompare-elim @gol
 -fcprop-registers @gol
 -fdce @gol
 -fdefer-pop @gol
 -fdelayed-branch @gol
 -fdse @gol
+-fforward-propagate @gol
 -fguess-branch-probability @gol
 -fif-conversion2 @gol
 -fif-conversion @gol
+-finline-functions-called-once @gol
 -fipa-pure-const @gol
 -fipa-profile @gol
 -fipa-reference @gol
--fmerge-constants
+-fmerge-constants @gol
+-fmove-loop-invariants @gol
+-fshrink-wrap @gol
 -fsplit-wide-types @gol
 -ftree-bit-ccp @gol
--ftree-builtin-call-dce @gol
 -ftree-ccp @gol
 -fssa-phiopt @gol
 -ftree-ch @gol
+-ftree-copy-prop @gol
 -ftree-copyrename @gol
 -ftree-dce @gol
 -ftree-dominator-opts @gol
@@ -6947,6 +6953,7 @@ compilation time.
 -ftree-forwprop @gol
 -ftree-fre @gol
 -ftree-phiprop @gol
+-ftree-sink @gol
 -ftree-slsr @gol
 -ftree-sra @gol
 -ftree-pta @gol
@@ -6978,19 +6985,23 @@ also turns on the following optimization flags:
 -fhoist-adjacent-loads @gol
 -finline-small-functions @gol
 -findirect-inlining @gol
+-fipa-cp @gol
 -fipa-sra @gol
 -fisolate-erroneous-paths-dereference @gol
 -foptimize-sibling-calls @gol
+-foptimize-strlen @gol
 -fpartial-inlining @gol
 -fpeephole2 @gol
--freorder-blocks  -freorder-functions @gol
+-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
 -frerun-cse-after-loop  @gol
 -fsched-interblock  -fsched-spec @gol
 -fschedule-insns  -fschedule-insns2 @gol
 -fstrict-aliasing -fstrict-overflow @gol
+-ftree-builtin-call-dce @gol
 -ftree-switch-conversion -ftree-tail-merge @gol
 -ftree-pre @gol
--ftree-vrp}
+-ftree-vrp @gol
+-fuse-caller-save}
 
 Please note the warning under @option{-fgcse} about
 invoking @option{-O2} on programs that use computed gotos.
@@ -7000,9 +7011,10 @@ invoking @option{-O2} on programs that use computed gotos.
 Optimize yet more.  @option{-O3} turns on all optimizations specified
 by @option{-O2} and also turns on the @option{-finline-functions},
 @option{-funswitch-loops}, @option{-fpredictive-commoning},
-@option{-fgcse-after-reload}, @option{-ftree-loop-vectorize},
-@option{-ftree-slp-vectorize}, @option{-fvect-cost-model},
-@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options.
+@option{-fgcse-after-reload}, @option{-ftree-loop-distribute-patterns},
+@option{-ftree-loop-vectorize}, @option{-ftree-slp-vectorize},
+@option{-fvect-cost-model}, @option{-ftree-partial-pre}
+and @option{-fipa-cp-clone} options.
 
 @item -O0
 @opindex O0
@@ -7113,6 +7125,14 @@ Optimize sibling and tail recursive calls.
 
 Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
 
+@item -foptimize-strlen
+@opindex foptimize-strlen
+Optimize various standard C string functions (e.g. @code{strlen},
+@code{strchr} or @code{strcpy}) and
+their _FORTIFY_SOURCE counterparts into faster alternatives.
+
+Enabled at levels @option{-O2}, @option{-O3}.
+
 @item -fno-inline
 @opindex fno-inline
 Do not expand any functions inline apart from those marked with
@@ -7278,6 +7298,8 @@ register, compare it against zero, then branch based upon the result.
 This option is only meaningful on architectures that support such
 instructions, which include x86, PowerPC, IA-64 and S/390.
 
+Enabled by default at -O1 and higher.
+
 The default is @option{-fbranch-count-reg}.
 
 @item -fno-function-cse


Re: [RFC, PATCH 1/n] IPA C++ refactoring

2014-07-14 Thread Martin Liška

Hello

On 07/11/2014 12:07 PM, Jan Hubicka wrote:

Hi,
this first patch continues with rafactoring of IPA infrastructure so that 
we will have C++ API. In the patch, I transformed many global functions to 
members of symtab_node and cgraph_node.

Example:
cgraph_remove_node (struct cgraph_node *node) - cgraph_node::remove (void)
symtab_unregister_node (symtab_node *node) - symtab_node::unregister (void)

The patch is being consulted with Honza and will iterate. We want to inform 
folk that we plan to do following changes.

After the patch is applied, I would like to transform varpool_node and 
cgraph_edge in the following patch.

Thank you for your comments,
Martin
  
  /* Remove the node from cgraph.  */


Perhaps Remove function from symbol table.
(similarly for varpool, perhaps few other block comments needs revisiting.
We may do that incrementally.)
  
+  /* Add node into symbol table.  This function is not used directly, but via

+ cgraph/varpool node creation routines.  */
+  void register_symbol (void);
+
+  /* Remove symtab node from the symbol table.  */
+  void remove (void);
+
+  /* Dump symtab node to F.  */
+  void dump (FILE *f);
+
+  /* Dump symtab node to stderr.  */
+  void DEBUG_FUNCTION debug (void);
+
+  /* Verify consistency of node.  */
+  void DEBUG_FUNCTION verify (void);
+
+  /* Return ipa reference from this symtab_node to
+ REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type
+ of the use and STMT the statement (if it exists).  */
+  struct ipa_ref *add_reference (symtab_node *referred_node,
+   enum ipa_ref_use use_type);
+
+  /* Return ipa reference from this symtab_node to
+ REFERED_NODE or REFERED_VARPOOL_NODE. USE_TYPE specify type
+ of the use and STMT the statement (if it exists).  */
+  struct ipa_ref *add_reference (symtab_node *referred_node,
+enum ipa_ref_use use_type, gimple stmt);
+
+  /* If VAL is a reference to a function or a variable, add a reference from
+ this symtab_node to the corresponding symbol table node.  USE_TYPE specify
+ type of the use and STMT the statement (if it exists).  Return the new
+ reference or NULL if none was created.  */
+  struct ipa_ref *maybe_add_reference (tree val, enum ipa_ref_use use_type,
+  gimple stmt);
+
+  /* Clone all references from symtab NODE to this symtab_node.  */
+  void clone_references (symtab_node *node);
+
+  /* Remove all stmt references in non-speculative references.
+ Those are not maintained during inlining  clonning.
+ The exception are speculative references that are updated along
+ with callgraph edges associated with them.  */
+  void clone_referring (symtab_node *node);
+
+  /* Clone reference REF to this symtab_node and set its stmt to STMT.  */
+  struct ipa_ref *clone_reference (struct ipa_ref *ref, gimple stmt);
+
+  /* Find the structure describing a reference to REFERRED_NODE
+ and associated with statement STMT.  */
+  struct ipa_ref *find_reference (symtab_node *, gimple, unsigned int);
+
+  /* Remove all references that are associated with statement STMT.  */
+  void remove_stmt_references (gimple stmt);
+
+  /* Remove all stmt references in non-speculative references.
+ Those are not maintained during inlining  clonning.
+ The exception are speculative references that are updated along
+ with callgraph edges associated with them.  */
+  void clear_stmts_in_references (void);
+
+  /* Remove all references in ref list.  */
+  void remove_all_references (void);
+
+  /* Remove all referring items in ref list.  */
+  void remove_all_referring (void);
+
+  /* Dump references in ref list to FILE.  */
+  void dump_references (FILE *file);
+
+  /* Dump referring in list to FILE.  */
+  void dump_referring (FILE *);
+
+  /* Return true if symtab node and TARGET represents
+ semantically equivalent symbols.  */
+  bool semantically_equivalent_p (symtab_node *target);
+
+  /* Classify symbol symtab node for partitioning.  */
+  enum symbol_partitioning_class get_partitioning_class (void);
+
+  /* Return comdat group.  */
+  tree get_comdat_group ()
+{
+  return x_comdat_group;
+}
+
+  /* Return comdat group as identifier_node.  */
+  tree get_comdat_group_id ()
+{
+  if (x_comdat_group  TREE_CODE (x_comdat_group) != IDENTIFIER_NODE)
+   x_comdat_group = DECL_ASSEMBLER_NAME (x_comdat_group);
+  return x_comdat_group;
+}
+
+  /* Set comdat group.  */
+  void set_comdat_group (tree group)
+{
+  gcc_checking_assert (!group || TREE_CODE (group) == IDENTIFIER_NODE
+  || DECL_P (group));
+  x_comdat_group = group;
+}
+
+  /* Return section as string.  */
+  const char * get_section ()
+{
+  if (!x_section)
+   return NULL;
+  return x_section-name;
+}
+
+  /* Remove node from same comdat group.   */
+  void remove_from_same_comdat_group (void);
+
+  /* 

Re: [PATCH 2/5] Existing call graph infrastructure enhancement

2014-07-17 Thread Martin Liška


On 06/30/2014 08:54 PM, Jeff Law wrote:

On 06/30/14 05:49, Martin Liška wrote:


On 06/17/2014 10:00 PM, Jeff Law wrote:

On 06/13/14 04:26, mliska wrote:

Hi,
 this small patch prepares remaining needed infrastructure for
the new pass.

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* ipa-utils.h (polymorphic_type_binfo_p): Function marked external
instead of static.
* ipa-devirt.c (polymorphic_type_binfo_p): Likewise.
* ipa-prop.h (count_formal_params): Likewise.
* ipa-prop.c (count_formal_params): Likewise.
* ipa-utils.c (ipa_merge_profiles): Be more tolerant if we merge
profiles for semantically equivalent functions.
* passes.c (do_per_function): If we load body of a function
during WPA,
this condition should behave same.
* varpool.c (ctor_for_folding): More tolerant assert for variable
aliases created during WPA.

Presumably we don't have any useful way to merge the cases where we
have provides for SRC  DST in ipa_merge_profiles or even to guess
which is more useful when presented with both?  Does it make sense to
log this into a debugging file when we drop one?

Hello,
this merge function was written by Honza, what do you think Honza
about this note?


I think this patch is fine.  If adding logging makes sense, then feel
free to do so and consider that trivial change pre-approved.

I made a small change to this patch, where I moved
'gsi_next_nonvirtual_phi' from the pass to gimple-iterator.h.

Ready for trunk with this change?

Yes.  I think with the exception of patch #3/5 everything looks good. I'll try 
to get another pass over #3 this week.  What I looked at last week was pretty 
good; I'm pretty confident this will be wrapped up shortly.

If #1/#2 make sense to install independent of #3, go ahead.  #4/#5 are 
obviously dependent on #3.

Jeff


Hello,
   thank you for approval, this final version removes few hunks that are not 
needed any more. Changes are just cosmetic and I will commit the patch at the 
beginning of next week.

Thanks,
Martin

diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
index 909d58b..47168b9 100644
--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
@@ -281,6 +281,30 @@ gsi_last_nondebug_bb (basic_block bb)
   return i;
 }
 
+/* Iterates I statement iterator to the next non-virtual statement.  */
+
+static inline void
+gsi_next_nonvirtual_phi (gimple_stmt_iterator *i)
+{
+  gimple phi;
+
+  if (gsi_end_p (*i))
+return;
+
+  phi = gsi_stmt (*i);
+  gcc_assert (phi != NULL);
+
+  while (virtual_operand_p (gimple_phi_result (phi)))
+{
+  gsi_next (i);
+
+  if (gsi_end_p (*i))
+	return;
+
+  phi = gsi_stmt (*i);
+}
+}
+
 /* Return the basic block associated with this iterator.  */
 
 static inline basic_block
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index 40f696b..aecba07 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -211,7 +211,7 @@ ipa_populate_param_decls (struct cgraph_node *node,
 
 /* Return how many formal parameters FNDECL has.  */
 
-static inline int
+int
 count_formal_params (tree fndecl)
 {
   tree parm;
diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
index 8886e93..bc6249e 100644
--- a/gcc/ipa-prop.h
+++ b/gcc/ipa-prop.h
@@ -529,6 +529,7 @@ void ipa_free_all_edge_args (void);
 void ipa_free_all_structures_after_ipa_cp (void);
 void ipa_free_all_structures_after_iinln (void);
 void ipa_register_cgraph_hooks (void);
+int count_formal_params (tree fndecl);
 
 /* This function ensures the array of node param infos is big enough to
accommodate a structure for all nodes and reallocates it if not.  */
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index c191210..d58b170 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -660,13 +660,8 @@ ipa_merge_profiles (struct cgraph_node *dst,
   if (dst-tp_first_run  src-tp_first_run  src-tp_first_run)
 dst-tp_first_run = src-tp_first_run;
 
-  if (src-profile_id)
-{
-  if (!dst-profile_id)
-	dst-profile_id = src-profile_id;
-  else
-	gcc_assert (src-profile_id == dst-profile_id);
-}
+  if (src-profile_id  !dst-profile_id)
+dst-profile_id = src-profile_id;
 
   if (!dst-count)
 return;
diff --git a/gcc/passes.c b/gcc/passes.c
index 61b4c12..bae302b 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -1478,7 +1478,7 @@ do_per_function (void (*callback) (function *, void *data), void *data)
 {
   struct cgraph_node *node;
   FOR_EACH_DEFINED_FUNCTION (node)
-	if (node-analyzed  gimple_has_body_p (node-decl)
+	if (node-analyzed  (gimple_has_body_p (node-decl)  !in_lto_p)
 	 (!node-clone_of || node-decl != node-clone_of-decl))
 	  callback (DECL_STRUCT_FUNCTION (node-decl), data);
 }
diff --git a/gcc/varpool.c b/gcc/varpool.c
index 04ce714..5662985 100644
--- a/gcc/varpool.c
+++ b/gcc/varpool.c
@@ -397,6 +397,7 @@ ctor_for_folding (tree decl)
   if (decl != real_decl)
 {
   gcc_assert (!DECL_INITIAL (decl)
+		  || (node-alias

Re: [PATCH, go]: Restore bootstrap

2014-07-24 Thread Martin Liška

On 07/24/2014 07:57 PM, Uros Bizjak wrote:

Hello!

2014-07-24  Uros Bizjak  ubiz...@gmail.com

 * go/go-gcc.cc (Gcc_backend::global_variable_set_init): Rename
 symtab_get_node to symtab_node::get.

Tested on x86_64-linux-gnu and committed to mainline SVN.

Uros.

Hello,
thank you for your fix. As I see my configure output:

$ ../configure --disable-bootstrap --enable-checking=release 
--enable-languages=all --disable-multilib


The following languages will be built: c,c++,fortran,java,lto,objc
*** This configuration is not supported in the following subdirectories:
 gnattools target-libada target-libgo target-libbacktrace
(Any other directories should still work fine.)

I thought with 'all' I cover all front-ends, is it really intended 
behavior that go is disable by 'all'?


Thank you,
Martin


Re: [PATCH, go]: Restore bootstrap

2014-07-25 Thread Martin Liška


On 07/25/2014 03:51 AM, Ian Lance Taylor wrote:

On Thu, Jul 24, 2014 at 12:21 PM, Martin Liška mli...@suse.cz wrote:

On 07/24/2014 07:57 PM, Uros Bizjak wrote:

Hello!

2014-07-24  Uros Bizjak  ubiz...@gmail.com

  * go/go-gcc.cc (Gcc_backend::global_variable_set_init): Rename
  symtab_get_node to symtab_node::get.

Tested on x86_64-linux-gnu and committed to mainline SVN.

Uros.

Hello,
 thank you for your fix. As I see my configure output:

$ ../configure --disable-bootstrap --enable-checking=release
--enable-languages=all --disable-multilib

The following languages will be built: c,c++,fortran,java,lto,objc
*** This configuration is not supported in the following subdirectories:
  gnattools target-libada target-libgo target-libbacktrace
 (Any other directories should still work fine.)

I thought with 'all' I cover all front-ends, is it really intended behavior
that go is disable by 'all'?

Yes, --enable-languages=all is the default, so it only builds the
frontends that are enabled by default, so it does not build the Go or
Ada frontends.  I know it doesn't make much sense.

Ian

I would suggest to replace currently used 'all' to 'default'. And 'all' can be 
really used for all possible frontends we have :) I know it changes the 
behavior, but I hope I makes sense?

Thank you,
Martin



Re: [PATCH, go]: Restore bootstrap

2014-07-29 Thread Martin Liška


On 07/28/2014 05:24 PM, Mike Stump wrote:

On Jul 28, 2014, at 3:29 AM, Gerald Pfeifer ger...@pfeifer.com wrote:

On Fri, 25 Jul 2014, Martin Liška wrote:

Yes, --enable-languages=all is the default, so it only builds the
frontends that are enabled by default, so it does not build the Go or
Ada frontends.  I know it doesn't make much sense.

I would suggest to replace currently used 'all' to 'default'. And 'all'
can be really used for all possible frontends we have :) I know it
changes the behavior, but I hope I makes sense?

I'd be in favor of that (assuming you mean all that we have and
that are supported for the host/target combination in questions).

When I want to do all, I actually do want to do all.  I think as long as we 
retain a spelling for the trimmed list, (aka default or most or some spelling), 
I think it would be an improvement.  A second option would be a 
noimeanreallyall spelling to get all and leave all alone.  I don’t favor that.

Hello,
   I am not a configure script guru, but what do you think about suggested 
patch?

Thanks,
MArtin
diff --git a/configure b/configure
index 353730b..e5eb557 100755
--- a/configure
+++ b/configure
@@ -6298,7 +6298,7 @@ if test -d ${srcdir}/gcc; then
   enable_languages=${LANGUAGES}
 echo configure.in: warning: setting LANGUAGES is deprecated, use --enable-languages instead 12
 else
-  enable_languages=all
+  enable_languages=default
 fi
   else
 if test x${enable_languages} = x ||
@@ -6350,6 +6350,7 @@ if test -d ${srcdir}/gcc; then
 for other in ${lang_requires} ${lang_requires_boot_languages}; do
   case ,${enable_languages}, in
 	*,$other,*) ;;
+	*,default,*) ;;
 	*,all,*) ;;
 	*,$language,*)
 	  echo  \`$other' language required by \`$language'; enabling 12
@@ -6361,6 +6362,7 @@ if test -d ${srcdir}/gcc; then
 	  if test $other != c; then
 	case ,${enable_stage1_languages}, in
 	  *,$other,*) ;;
+	  *,default,*) ;;
 	  *,all,*) ;;
 	  *)
 		case ,${enable_languages}, in
@@ -6393,7 +6395,7 @@ if test -d ${srcdir}/gcc; then
   fi
 
 
-  missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,c,/,/ `
+  missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,default,/,/ -e s/,c,/,/ `
   potential_languages=,c,
 
   enabled_target_libs=
@@ -6433,12 +6435,18 @@ if test -d ${srcdir}/gcc; then
 	  add_this_lang=yes
 	fi
 ;;
-  *,all,*)
-# 'all' was selected, select it if it is a default language
+  *,default,*)
+# 'default' was selected, select it if it is a default language
 	if test $language != c; then
 	  add_this_lang=${build_by_default}
 	fi
 ;;
+  *,all,*)
+# 'all' was selected, add the language
+	if test $language != c; then
+	  add_this_lang=yes
+	fi
+;;
 esac
 
 # Disable languages that need other directories if these aren't available.
diff --git a/configure.ac b/configure.ac
index d0f7471..2d99dc5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1759,7 +1759,7 @@ if test -d ${srcdir}/gcc; then
   enable_languages=${LANGUAGES}
 echo configure.in: warning: setting LANGUAGES is deprecated, use --enable-languages instead 12
 else
-  enable_languages=all
+  enable_languages=default
 fi
   else
 if test x${enable_languages} = x ||
@@ -1811,6 +1811,7 @@ if test -d ${srcdir}/gcc; then
 for other in ${lang_requires} ${lang_requires_boot_languages}; do
   case ,${enable_languages}, in
 	*,$other,*) ;;
+	*,default,*) ;;
 	*,all,*) ;;
 	*,$language,*)
 	  echo  \`$other' language required by \`$language'; enabling 12
@@ -1822,6 +1823,7 @@ if test -d ${srcdir}/gcc; then
 	  if test $other != c; then
 	case ,${enable_stage1_languages}, in
 	  *,$other,*) ;;
+	  *,default,*) ;;
 	  *,all,*) ;;
 	  *)
 		case ,${enable_languages}, in
@@ -1854,7 +1856,7 @@ if test -d ${srcdir}/gcc; then
   fi
   AC_SUBST(extra_host_libiberty_configure_flags)
 
-  missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,c,/,/ `
+  missing_languages=`echo ,$enable_languages, | sed -e s/,all,/,/ -e s/,default,/,/ -e s/,c,/,/ `
   potential_languages=,c,
 
   enabled_target_libs=
@@ -1894,12 +1896,18 @@ if test -d ${srcdir}/gcc; then
 	  add_this_lang=yes
 	fi
 ;;
-  *,all,*)
-# 'all' was selected, select it if it is a default language
+  *,default,*)
+# 'default' was selected, select it if it is a default language
 	if test $language != c; then
 	  add_this_lang=${build_by_default}
 	fi
 ;;
+  *,all,*)
+# 'all' was selected, add the language
+	if test $language != c; then
+	  add_this_lang=yes
+	fi
+;;
 esac
 
 # Disable languages that need other directories if these aren't available.


Re: [PATCH, go]: Restore bootstrap

2014-07-29 Thread Martin Liška


On 07/29/2014 07:01 PM, Mike Stump wrote:

On Jul 29, 2014, at 2:10 AM, Martin Liška mli...@suse.cz wrote:

   I am not a configure script guru, but what do you think about suggested 
patch?

Looks exactly like what I would do.
All right, I'll send the patch, as well as documentation enhancement, to 
a separate mailing list thread.


Thanks,
Martin


Re: [PATCH] LTO streamer reorg - try to reduce WPA memory use

2014-07-30 Thread Martin Liška


On 07/30/2014 11:41 AM, Richard Biener wrote:

On Wed, 30 Jul 2014, Richard Biener wrote:


On Wed, Jul 30, 2014 at 7:51 AM, Markus Trippelsdorf
mar...@trippelsdorf.de wrote:

On 2014.07.29 at 15:10 +0200, Richard Biener wrote:

On Tue, 29 Jul 2014, Richard Biener wrote:


This re-organizes the LTO streamer to do compression transparently
in the data-streamer routines (and disables section compression
by defaulting to -flto-compression-level=0).  This avoids
keeping the whole uncompressed sections in memory, only retaining
the compressed ones.

The downside is that we lose compression of at least the string
parts (they are abusing the streaming interface quite awkwardly
and doing random-accesses with offsets into the uncompressed
section).  With a little bit of surgery we can get that back I
think (but we'd have to keep the uncompressed piece in memory
somewhere which means losing the memory use advantage).

Very lightly tested sofar (running lto.exp).  I'll try a LTO
bootstrap now.

I wonder what the change is on WPA memory use for larger
projects and what the effect on object file size is.

Updated patch passing LTO bootstrap (one warning fix) and
with a memory leak fixed.

Testing with Firefox is impossible at the moment because of PR61885.
One thing I've noticed (before the ICE) is that virtual memory usage is
very high:

AddressKbytes  RSSDirty  Mode  Mapping
004016344 90840  r-x-- lto1
013f6000   36   36   28  rw--- lto1
013ff000 1072  276  276  rw---   [ anon ]
034aa000 10154940  1540384  1540384  rw---   [ anon ]
2acf04af2000  136  1360  r-x-- ld-2.19.90.so
2acf04b14000   88   88   88  rw---   [ anon ]
...
  ---  ---  ---
total kB 12022060  3388396  3377708

Maybe there is still a memleak (just checked that LTOing int main() {}
doesn't leak).

Found it:

Index: gcc/lto-section-in.c
===
--- gcc/lto-section-in.c.orig   2014-07-30 12:40:27.950225826 +0200
+++ gcc/lto-section-in.c2014-07-30 12:37:44.179237102 +0200
@@ -249,7 +249,7 @@ lto_destroy_simple_input_block (struct l
 struct lto_input_block *ib,
 const char *data, size_t len)
  {
-  free (ib);
+  delete ib;
lto_free_section_data (file_data, section_type, NULL, data, len);
  }
  
Richard.

Hello,
   there's memory/CPU usage for the patch. for both, I used sync and 
drop_caches.

Url: 
https://drive.google.com/file/d/0B0pisUJ80pO1andOX19JMHV3LVE/edit?usp=sharing

Martin



[PATCH] Fix for ipa/63795, ipa/63622

2014-11-11 Thread Martin Liška

Hello.

Following patch adds checking for aliasing support. Patch can bootstrap on 
x86_64-apple-darwin1 and is part of patches needed for bootstrap restory on the 
target. I plan to introduce additional patch that will cover testsuite failures 
for the target.

Ready for trunk?
Thanks,
Martin
gcc/ChangeLog:

2014-11-11  Martin Liska  mli...@suse.cz

* ipa-icf.c (sem_function::merge): Add new target aliasing
support guide. 
(sem_variable::merge): Likewise.
* ipa-icf.h (target_supports_aliasing_p): New function.

gcc/testsuite/ChangeLog:

2014-11-11  Martin Liska  mli...@suse.cz

* g++.dg/ipa/ipa-icf-4.C: Add more precise dump scan.
* g++.dg/ipa/ipa-icf-5.C: Add condition for targets with aliasing 
support.
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index 84cc0ca..f19c3c1 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -191,6 +191,18 @@ sem_item::dump (void)
 }
 }
 
+/* Return true if target supports aliasing.  */
+
+bool
+sem_item::target_supports_aliasing_p (void)
+{
+#if !defined (ASM_OUTPUT_DEF) || (!defined(ASM_OUTPUT_WEAK_ALIAS)  !defined (ASM_WEAKEN_DECL))
+  return false;
+#else
+  return true;
+#endif
+}
+
 /* Semantic function constructor that uses STACK as bitmap memory stack.  */
 
 sem_function::sem_function (bitmap_obstack *stack): sem_item (FUNC, stack),
@@ -589,7 +601,8 @@ sem_function::merge (sem_item *alias_item)
   redirect_callers = false;
 }
 
-  if (create_alias  DECL_COMDAT_GROUP (alias-decl))
+  if (create_alias  (DECL_COMDAT_GROUP (alias-decl)
+		   || !sem_item::target_supports_aliasing_p ()))
 {
   create_alias = false;
   create_thunk = true;
@@ -605,6 +618,14 @@ sem_function::merge (sem_item *alias_item)
 local_original
   = dyn_cast cgraph_node * (original-noninterposable_alias ());
 
+if (!local_original)
+  {
+	if (dump_file)
+	  fprintf (dump_file, Noninterposable alias cannot be created.\n\n);
+
+	return false;
+  }
+
   if (redirect_callers)
 {
   /* If alias is non-overwritable then
@@ -649,7 +670,7 @@ sem_function::merge (sem_item *alias_item)
   alias-resolve_alias (original);
 
   /* Workaround for PR63566 that forces equal calling convention
-	 to be used.  */
+   to be used.  */
   alias-local.local = false;
   original-local.local = false;
 
@@ -1155,6 +1176,13 @@ sem_variable::merge (sem_item *alias_item)
 {
   gcc_assert (alias_item-type == VAR);
 
+  if (!sem_item::target_supports_aliasing_p ())
+{
+  if (dump_file)
+	fprintf (dump_file, Aliasing is not supported by target\n\n);
+  return false;
+}
+
   sem_variable *alias_var = static_castsem_variable * (alias_item);
 
   varpool_node *original = get_node ();
diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index d8e7b16..6e15166 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -138,9 +138,11 @@ public:
 
   /* Return base tree that can be used for compatible_types_p and
  contains_polymorphic_type_p comparison.  */
-
   static bool get_base_types (tree *t1, tree *t2);
 
+  /* Return true if target supports aliasing.  */
+  static bool target_supports_aliasing_p (void);
+
   /* Item type.  */
   sem_item_type type;
 
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
index 9434289..67f2744 100644
--- a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
+++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
@@ -43,6 +43,6 @@ int main()
   return 123;
 }
 
-/* { dg-final { scan-ipa-dump Varpool alias has been created icf  } } */
+/* { dg-final { scan-ipa-dump \(Varpool alias has been created\)|\(Aliasing is not supported by target\) icf  } } */
 /* { dg-final { scan-ipa-dump Equal symbols: 6 icf  } } */
 /* { dg-final { cleanup-ipa-dump icf } } */
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
index f835814..57dcb78 100644
--- a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
+++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-visibility  } */
+/* { dg-require-alias  } */
 /* { dg-options -O2 -fdump-ipa-icf } */
 
 struct test


Re: [PATCH] Fix some ICF gimple_call handling issues

2014-11-11 Thread Martin Liška

On 11/11/2014 12:11 AM, Jakub Jelinek wrote:

On Mon, Nov 10, 2014 at 10:08:54PM +0100, Richard Biener wrote:

@@ -662,9 +662,49 @@ func_checker::compare_gimple_call (gimpl
   t1 = gimple_call_fndecl (s1);
   t2 = gimple_call_fndecl (s2);


Just drop these and compare gimple_call_fn only.


+  tree chain1 = gimple_call_chain (s1);
+  tree chain2 = gimple_call_chain (s2);
+
+  if ((chain1  !chain2) || (!chain1  chain2))
+return return_false_with_msg (Tree call chains are different);


I miss a compare_operands for the call chain.

Otherwise OK.


Here is what I've committed after another bootstrap/regtest.
Note, I've tried:
__attribute__ ((noinline, noclone))
int f1 (int x)
{
   int y = 3, z = 4;
   __attribute__ ((noinline, noclone)) int
   f2 (int a) { return a + x + y + z; }
   return f2 (5);
}

__attribute__ ((noinline, noclone))
int f3 (int x)
{
   int y = 3, z = 4;
   __attribute__ ((noinline, noclone)) int
   f4 (int a) { return a + x + y + z; }
   return f4 (5);
}

int
main ()
{
   if (f1 (9) != 21 || f3 (9) != 21)
 __builtin_abort ();
   return 0;
}
but ICF doesn't optimize this with or without the patch,
as the structs aren't the same type (supposedly different alias set?),
even when they have the same members laid out the same.


Hello Jakub.

You are right, more precisely types_compatible_p return false for these
two structures. I'll write this situation to my TODO list.

Thank you for sending the patch.
Martin



2014-11-11  Jakub Jelinek  ja...@redhat.com
Martin Liska  mli...@suse.cz

* ipa-icf-gimple.c (func_checker::compare_bb): Fix comment typo.
(func_checker::compare_gimple_call): Compare gimple_call_fn,
gimple_call_chain, gimple_call_fntype and call flags.
testsuite/
* gcc.dg/ubsan/ipa-icf-1.c: New test.
* gcc.dg/ipa/ipa-icf-31.c: New test.

--- gcc/ipa-icf-gimple.c.jj 2014-10-30 14:42:20.0 +0100
+++ gcc/ipa-icf-gimple.c2014-11-10 19:08:38.339986360 +0100
@@ -554,7 +554,7 @@ func_checker::parse_labels (sem_bb *bb)

 In general, a collection of equivalence dictionaries is built for types
 like SSA names, declarations (VAR_DECL, PARM_DECL, ..). This infrastructure
-   is utilized by every statement-by-stament comparison function.  */
+   is utilized by every statement-by-statement comparison function.  */

  bool
  func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
@@ -659,12 +659,39 @@ func_checker::compare_gimple_call (gimpl
if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
  return false;

-  t1 = gimple_call_fndecl (s1);
-  t2 = gimple_call_fndecl (s2);
-
-  /* Function pointer variables are not supported yet.  */
+  t1 = gimple_call_fn (s1);
+  t2 = gimple_call_fn (s2);
if (!compare_operand (t1, t2))
-return return_false();
+return return_false ();
+
+  /* Compare flags.  */
+  if (gimple_call_internal_p (s1) != gimple_call_internal_p (s2)
+  || gimple_call_ctrl_altering_p (s1) != gimple_call_ctrl_altering_p (s2)
+  || gimple_call_tail_p (s1) != gimple_call_tail_p (s2)
+  || gimple_call_return_slot_opt_p (s1) != gimple_call_return_slot_opt_p 
(s2)
+  || gimple_call_from_thunk_p (s1) != gimple_call_from_thunk_p (s2)
+  || gimple_call_va_arg_pack_p (s1) != gimple_call_va_arg_pack_p (s2)
+  || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p (s2)
+  || gimple_call_with_bounds_p (s1) != gimple_call_with_bounds_p (s2))
+return false;
+
+  if (gimple_call_internal_p (s1)
+   gimple_call_internal_fn (s1) != gimple_call_internal_fn (s2))
+return false;
+
+  tree fntype1 = gimple_call_fntype (s1);
+  tree fntype2 = gimple_call_fntype (s2);
+  if ((fntype1  !fntype2)
+  || (!fntype1  fntype2)
+  || (fntype1  !types_compatible_p (fntype1, fntype2)))
+return return_false_with_msg (call function types are not compatible);
+
+  tree chain1 = gimple_call_chain (s1);
+  tree chain2 = gimple_call_chain (s2);
+  if ((chain1  !chain2)
+  || (!chain1  chain2)
+  || !compare_operand (chain1, chain2))
+return return_false_with_msg (static call chains are different);

/* Checking of argument.  */
for (i = 0; i  gimple_call_num_args (s1); ++i)
--- gcc/testsuite/gcc.dg/ubsan/ipa-icf-1.c.jj   2014-11-10 19:00:53.509525071 
+0100
+++ gcc/testsuite/gcc.dg/ubsan/ipa-icf-1.c  2014-11-10 19:02:21.836925806 
+0100
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-skip-if  { *-*-* } { * } { -O2 } } */
+/* { dg-options -fsanitize=undefined -fipa-icf } */
+
+__attribute__ ((noinline, noclone))
+int f1 (int x, int y)
+{
+  return x + y;
+}
+
+__attribute__ ((noinline, noclone))
+int f2 (int x, int y)
+{
+  return x - y;
+}
+
+int
+main ()
+{
+  if (f1 (5, 6) != 11 || f2 (5, 6) != -1)
+__builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c.jj2014-11-10 18:59:16.604294652 
+0100
+++ gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c   2014-11-10 18:59:59.690519616 
+0100
@@ -0,0 +1,41 @@
+/* { 

Re: [PATCH 1/4] cgraph_summary data structure introduction.

2014-11-13 Thread Martin Liška

On 11/13/2014 03:33 PM, Richard Biener wrote:

On Thu, Nov 13, 2014 at 3:10 PM, mliska mli...@suse.cz wrote:

Following patchset introduces cgraph_summary template class that
should replace custom implementation of cgraph related summaries.

Idea behind the patch is to provide a generic interface one can use
to register custom summary related to a cgraph_node. As you know,
symbol_table provides hooks for creation, deletion and duplication
of a cgraph_node. If you have a pass, you need to handle all these
hooks and store custom data in your data structure.

Patchset can boostrap on x86_64-linux-pc and no regression was
observed.

Ready for trunk?
Thank you,
Martin
---
  gcc/ChangeLog | 1 +
  1 file changed, 1 insertion(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2eb6f35..ad3c2bf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,4 @@
+
  2014-11-12  Alan Lawrence  alan.lawre...@arm.com

 * config/aarch64/aarch64.c (aarch64_simd_lane_bounds): Display indices.


huh?


Please ignore this modification, I forgot to remove it from the email patchset.
I've just introduced the change to be able to introduce git commit.

Thank you for understanding,
Martin




--
2.1.2






Re: [PATCH 2/4] New data structure for cgraph_summary introduced.

2014-11-13 Thread Martin Liška

On 11/13/2014 03:48 PM, Markus Trippelsdorf wrote:

On 2014.11.13 at 15:11 +0100, mliska wrote:

Just two remarks:


+template class T
+class GTY((user)) cgraph_summary T *
+{
+public:
+  /* Default construction takes SYMTAB as an argument.  */
+  cgraph_summary (symbol_table *symtab, bool ggc = false): m_ggc (ggc),
+m_insertion_enabled (true), m_symtab (symtab)
+  {
+cgraph_node *node;
+
+FOR_EACH_FUNCTION (node)
+{
+  gcc_assert (node-summary_uid  0);
+}
+
+m_map = new hash_mapint, T*, summary_hashmap_traits(13, m_ggc);
+
+m_symtab_insertion_hook =
+  symtab-add_cgraph_insertion_hook
+  (cgraph_summary::symtab_insertion, this);
+
+m_symtab_removal_hook =
+  symtab-add_cgraph_removal_hook
+  (cgraph_summary::symtab_removal, this);
+m_symtab_duplication_hook =
+  symtab-add_cgraph_duplication_hook
+  (cgraph_summary::symtab_duplication, this);
+  }
+
+  /* Destructor.  */
+  virtual ~cgraph_summary ()
+  {
+destroy ();
+  }


 From https://gcc.gnu.org/wiki/CppConventions:

Constructors and destructors are often much larger than programmers
expect. Prefer non-inline versions unless you have evidence that the
inline version is needed.


...
+  inline T* operator[] (int uid)
+  {
+T **v = m_map-get (uid);


The inline keyword is redundant for members inside a class definition.
Please drop it.



Hi.

Thank you for remarks. There's new version of the patch.

Thanks,
Martin
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 3d671c2..bf11277 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1175,6 +1175,7 @@ OBJS = \
 	cfgrtl.o \
 	symtab.o \
 	cgraph.o \
+	cgraph_summary.o \
 	cgraphbuild.o \
 	cgraphunit.o \
 	cgraphclones.o \
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e2becb9..588b6d5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1225,6 +1225,8 @@ public:
   int count_materialization_scale;
   /* Unique id of the node.  */
   int uid;
+  /* Summary unique id of the node.  */
+  int summary_uid;
   /* ID assigned by the profiling.  */
   unsigned int profile_id;
   /* Time profiler: first run of function.  */
@@ -1786,6 +1788,10 @@ public:
   friend class cgraph_node;
   friend class cgraph_edge;
 
+  symbol_table (): cgraph_max_summary_uid (1)
+  {
+  }
+
   /* Initialize callgraph dump file.  */
   void initialize (void);
 
@@ -1982,6 +1988,7 @@ public:
 
   int cgraph_count;
   int cgraph_max_uid;
+  int cgraph_max_summary_uid;
 
   int edges_count;
   int edges_max_uid;
@@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void)
   node-uid = cgraph_max_uid++;
 }
 
+  node-summary_uid = cgraph_max_summary_uid++;
   return node;
 }
 
diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c
new file mode 100644
index 000..9af1d7e
--- /dev/null
+++ b/gcc/cgraph_summary.c
@@ -0,0 +1,34 @@
+#include config.h
+#include system.h
+#include coretypes.h
+#include tm.h
+#include tree.h
+#include predict.h
+#include vec.h
+#include hashtab.h
+#include hash-set.h
+#include machmode.h
+#include hard-reg-set.h
+#include input.h
+#include function.h
+#include dominance.h
+#include cfg.h
+#include basic-block.h
+#include tree-ssa-alias.h
+#include internal-fn.h
+#include gimple-expr.h
+#include is-a.h
+#include gimple.h
+#include tree-inline.h
+#include dumpfile.h
+#include langhooks.h
+#include splay-tree.h
+#include hash-map.h
+#include plugin-api.h
+#include ipa-ref.h
+#include cgraph.h
+#include ipa-utils.h
+#include alloc-pool.h
+#include cgraph_summary.h
+#include ipa-prop.h
+#include hash-map.h
diff --git a/gcc/cgraph_summary.h b/gcc/cgraph_summary.h
new file mode 100644
index 000..d89b679
--- /dev/null
+++ b/gcc/cgraph_summary.h
@@ -0,0 +1,301 @@
+/* Callgraph summary data structure.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Martin Liska
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */
+
+#ifndef GCC_CGRAPH_SUMMARY_H
+#define GCC_CGRAPH_SUMMARY_H
+
+#define CGRAPH_SUMMARY_DELETED_VALUE -1
+#define CGRAPH_SUMMARY_EMPTY_VALUE 0
+
+template class T
+class cgraph_summary
+{
+  private:
+cgraph_summary();
+};
+
+template class T
+class GTY((user)) cgraph_summary T *
+{
+public:
+  /* Default construction takes SYMTAB as an argument.  */
+  cgraph_summary (symbol_table *symtab, bool ggc = false): m_ggc (ggc),
+m_insertion_enabled (true), m_symtab (symtab)
+  {
+cgraph_node *node;
+
+

Re: [PATCH 2/4] New data structure for cgraph_summary introduced.

2014-11-14 Thread Martin Liška

On 11/13/2014 04:50 PM, Jan Hubicka wrote:

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

* Makefile.in: New object file is added.
* cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID
is filled up.
* cgraph_summary.c: New file.
* cgraph_summary.h: New file.


Since I am trying to get rid of the cgraph prefixes for symbols (keep it for
the graph only) and the summaries can be annotated to variables too. Even if it
not necessarily supported by your current implementation, lets keep API
prepared for it. So I would call it symtab-summary.* for source files and
symtab_summary for base type  (probably function_summary for annotating
functions/cgraph_edge_summary for annotating edges?)


Hello.

I followed your remarks, new class is called function_summary and is located
in symbol-summary.h.




diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e2becb9..588b6d5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1225,6 +1225,8 @@ public:
int count_materialization_scale;
/* Unique id of the node.  */
int uid;
+  /* Summary unique id of the node.  */
+  int summary_uid;


What makes summary_uid better than uid?


Because cgraph_node::uid is not a unique ID, it's recycled. As I can see,
there are two remaining usages of the fact that cgraph::uid are quite 
consecutive:

a) node_growth_cache vector is resized according to cgraph_max_uid
b) lto-partition.c: lto_balanced_map

If we change ipa-related stuff to annotations and lto_balanced_map with be 
rewritten,
we can finally unify uid and summary_uid. As Martin correctly pointed out, we 
should
unify cgraph_node dumps, we combine uid and order.




diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c
new file mode 100644
index 000..9af1d7e
--- /dev/null
+++ b/gcc/cgraph_summary.c


And why do we need this file?  It will need license header if really needed.


Sure, the file can be removed.

Martin



The implementation seems sane - I will check the actual uses :)
Please send the updated patch though.

Honza



From d7c149edea20850e95fde2e2e332895f5b5a8594 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Thu, 13 Nov 2014 15:11:05 +0100
Subject: [PATCH 1/3] New data structure for function_summary introduced.

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

	* cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID
	is filled up.
	* symbol-summary.h: New file.
	* gengtype.c (open_base_files): Add symbol-summary.h.
	* toplev.c (general_init): Call constructor of symbol_table.
---
 gcc/cgraph.h |   8 ++
 gcc/gengtype.c   |   4 +-
 gcc/symbol-summary.h | 313 +++
 gcc/toplev.c |   3 +-
 4 files changed, 325 insertions(+), 3 deletions(-)
 create mode 100644 gcc/symbol-summary.h

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e2becb9..588b6d5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1225,6 +1225,8 @@ public:
   int count_materialization_scale;
   /* Unique id of the node.  */
   int uid;
+  /* Summary unique id of the node.  */
+  int summary_uid;
   /* ID assigned by the profiling.  */
   unsigned int profile_id;
   /* Time profiler: first run of function.  */
@@ -1786,6 +1788,10 @@ public:
   friend class cgraph_node;
   friend class cgraph_edge;
 
+  symbol_table (): cgraph_max_summary_uid (1)
+  {
+  }
+
   /* Initialize callgraph dump file.  */
   void initialize (void);
 
@@ -1982,6 +1988,7 @@ public:
 
   int cgraph_count;
   int cgraph_max_uid;
+  int cgraph_max_summary_uid;
 
   int edges_count;
   int edges_max_uid;
@@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void)
   node-uid = cgraph_max_uid++;
 }
 
+  node-summary_uid = cgraph_max_summary_uid++;
   return node;
 }
 
diff --git a/gcc/gengtype.c b/gcc/gengtype.c
index fac83ee..1e2db27 100644
--- a/gcc/gengtype.c
+++ b/gcc/gengtype.c
@@ -1842,8 +1842,8 @@ open_base_files (void)
   tree-ssa-loop-niter.h, tree-into-ssa.h, tree-dfa.h, 
   tree-ssa.h, reload.h, cpp-id-data.h, tree-chrec.h,
   except.h, output.h,  cfgloop.h, target.h, lto-streamer.h,
-  target-globals.h, ipa-ref.h, cgraph.h, ipa-prop.h, 
-  ipa-inline.h, dwarf2out.h, NULL
+  target-globals.h, ipa-ref.h, cgraph.h, function-summary.h,
+  ipa-prop.h, ipa-inline.h, dwarf2out.h, NULL
 };
 const char *const *ifp;
 outf_p gtype_desc_c;
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
new file mode 100644
index 000..893f065
--- /dev/null
+++ b/gcc/symbol-summary.h
@@ -0,0 +1,313 @@
+/* Callgraph summary data structure.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Martin Liska
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but 

Re: [PATCH 4/4] Data structure is used for inline_summary struct.

2014-11-14 Thread Martin Liška

On 11/13/2014 05:04 PM, Jan Hubicka wrote:

+  if (!inline_summary_summary)
+inline_summary_summary = (inline_summary_cgraph_summary *) 
inline_summary_cgraph_summary::create_ggc (symtab);


Hehe, this is funny naming scheme.
Peraps inline_summary_d and inline_summary_t for the data and type?


Hello.

I adopted suggested naming scheme.


-
-static void
-inline_node_duplication_hook (struct cgraph_node *src,
- struct cgraph_node *dst,
- ATTRIBUTE_UNUSED void *data)
+void
+inline_summary_cgraph_summary::duplication_hook (cgraph_node *src,
+ cgraph_node *dst,
+ inline_summary *,
+ inline_summary *info)


Becuase those are no longer hooks but virtual function, I guess we could call 
them
simply duplicate/insert/remove.


Agree with the change.



In a way I would like to see these to be methods of the underlying type rather 
than
virtual methods of the summary, becuase these are operations on the data 
themselves.
I was thinking to model these by specual constructor and copy constructor
(taking the extra node pointer parameters) and standard destructor.  I am not 
sure this
would be more understandable this way?


Motivation for this implementation is:
a) it's useful to have an access to cgraph_node that is associated with a sumary
b) with GTY, we cannot call destructors


-/* Need a typedef for inline_summary because of inline function
-   'inline_summary' below.  */
-typedef struct inline_summary inline_summary_t;
-extern GTY(()) vecinline_summary_t, va_gc *inline_summary_vec;
+class GTY((user)) inline_summary_cgraph_summary: public cgraph_summary 
inline_summary *
+{
+public:
+  inline_summary_cgraph_summary (symbol_table *symtab, bool ggc):
+cgraph_summary inline_summary * (symtab, ggc) {}
+
+  static inline_summary_cgraph_summary *create_ggc (symbol_table *symtab)
+  {
+inline_summary_cgraph_summary *summary = new (ggc_cleared_alloc 
inline_summary_cgraph_summary ()) inline_summary_cgraph_summary(symtab, true);
+summary-disable_insertion_hook ();
+return summary;
+  }
+
+
+  virtual void insertion_hook (cgraph_node *, inline_summary *);
+  virtual void removal_hook (cgraph_node *node, inline_summary *);
+  virtual void duplication_hook (cgraph_node *src, cgraph_node *dst, 
inline_summary *src_data, inline_summary *dst_data);
+};
+
+extern GTY(()) cgraph_summary inline_summary * *inline_summary_summary;


All in all it looks better than original code.  If we moved insert/


  /* Information kept about parameter of call site.  */
  struct inline_param_summary
@@ -249,10 +265,10 @@ void clone_inlined_nodes (struct cgraph_edge *e, bool, 
bool, int *,
  extern int ncalls_inlined;
  extern int nfunctions_inlined;

-static inline struct inline_summary *
-inline_summary (struct cgraph_node *node)
+static inline inline_summary *
+get_inline_summary (const struct cgraph_node *node)
  {
-  return (*inline_summary_vec)[node-uid];
+  return (*inline_summary_summary)[node-summary_uid];


Hmm, i guess there is no way to avoid the (*...)? Otherwise it would be cleaner
to use inline_summary[...] instead of get_inline_summary IMO.


I added function_summary::get method, where the usage looks cleaner:
inline_summary_d-get (node).

Thanks,
Martin
 

Thanks for working on this!
Honza



From 6e8531d8d3659524e337c7c1d96596952c3ff0e8 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 14 Nov 2014 14:54:12 +0100
Subject: [PATCH 3/3] Data structure is used for inline_summary struct.

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

	* cgraphunit.c (symbol_table::process_new_functions):
	inline_summary_vec is replaced with inline_summary_t.
	* ipa-cp.c (ipcp_cloning_candidate_p): Usage of inline_summary_d::get.
	(devirtualization_time_bonus): Likewise.
	(estimate_local_effects): Likewise.
	(ipcp_propagate_stage): Likewise.
	* ipa-inline-analysis.c (evaluate_conditions_for_known_args): Likewise.
	(evaluate_properties_for_edge): Likewise.
	(inline_summary_alloc): Deletion of old hook holders.
	(reset_inline_summary): inline_summary is added as argument.
	(inline_summary_cgraph_summary::removal_hook): New function.
	(inline_summary_cgraph_summary::duplication_hook): Likewise.
	(dump_inline_edge_summary): Struct keyword removed.
	(dump_inline_summary): Likewise.
	(estimate_function_body_sizes): Usage of inline_summary_d::get.
	(compute_inline_parameters): Likewise.
	(estimate_edge_devirt_benefit): Struct keyword removed.
	(estimate_node_size_and_time): Likewise.
	(inline_update_callee_summaries): Likewise.
	(inline_merge_summary): Usage of inline_summary_d::get.
	(inline_update_overall_summary): Likewise.
	(simple_edge_hints): Likewise.
	(do_estimate_edge_time): Likewise.
	(estimate_time_after_inlining): Likewise.
	(estimate_size_after_inlining): Likewise.
	(do_estimate_growth): Likewise.
	(growth_likely_positive): Likewise.
	(inline_generate_summary): 

Re: [PATCH 2/4] New data structure for cgraph_summary introduced.

2014-11-14 Thread Martin Liška

On 11/14/2014 03:04 PM, Martin Liška wrote:

On 11/13/2014 04:50 PM, Jan Hubicka wrote:

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

* Makefile.in: New object file is added.
* cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID
is filled up.
* cgraph_summary.c: New file.
* cgraph_summary.h: New file.


Since I am trying to get rid of the cgraph prefixes for symbols (keep it for
the graph only) and the summaries can be annotated to variables too. Even if it
not necessarily supported by your current implementation, lets keep API
prepared for it. So I would call it symtab-summary.* for source files and
symtab_summary for base type  (probably function_summary for annotating
functions/cgraph_edge_summary for annotating edges?)


Hello.

I followed your remarks, new class is called function_summary and is located
in symbol-summary.h.




diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e2becb9..588b6d5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1225,6 +1225,8 @@ public:
int count_materialization_scale;
/* Unique id of the node.  */
int uid;
+  /* Summary unique id of the node.  */
+  int summary_uid;


What makes summary_uid better than uid?


Because cgraph_node::uid is not a unique ID, it's recycled. As I can see,
there are two remaining usages of the fact that cgraph::uid are quite 
consecutive:

a) node_growth_cache vector is resized according to cgraph_max_uid
b) lto-partition.c: lto_balanced_map

If we change ipa-related stuff to annotations and lto_balanced_map with be 
rewritten,
we can finally unify uid and summary_uid. As Martin correctly pointed out, we 
should
unify cgraph_node dumps, we combine uid and order.




diff --git a/gcc/cgraph_summary.c b/gcc/cgraph_summary.c
new file mode 100644
index 000..9af1d7e
--- /dev/null
+++ b/gcc/cgraph_summary.c


And why do we need this file?  It will need license header if really needed.


Sure, the file can be removed.

Martin



The implementation seems sane - I will check the actual uses :)
Please send the updated patch though.

Honza





Hello.

There's v3 of the patch.

Martin
From 89b6fb0f599944b564726947b33a7be214dd0f74 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Thu, 13 Nov 2014 15:11:05 +0100
Subject: [PATCH 1/3] New data structure for function_summary introduced.

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

	* cgraph.h (symbol_table::allocate_cgraph_symbol): Summary UID
	is filled up.
	* symbol-summary.h: New file.
	* gengtype.c (open_base_files): Add symbol-summary.h.
	* toplev.c (general_init): Call constructor of symbol_table.
---
 gcc/cgraph.h |   8 ++
 gcc/gengtype.c   |   4 +-
 gcc/symbol-summary.h | 317 +++
 gcc/toplev.c |   3 +-
 4 files changed, 329 insertions(+), 3 deletions(-)
 create mode 100644 gcc/symbol-summary.h

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e2becb9..588b6d5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1225,6 +1225,8 @@ public:
   int count_materialization_scale;
   /* Unique id of the node.  */
   int uid;
+  /* Summary unique id of the node.  */
+  int summary_uid;
   /* ID assigned by the profiling.  */
   unsigned int profile_id;
   /* Time profiler: first run of function.  */
@@ -1786,6 +1788,10 @@ public:
   friend class cgraph_node;
   friend class cgraph_edge;
 
+  symbol_table (): cgraph_max_summary_uid (1)
+  {
+  }
+
   /* Initialize callgraph dump file.  */
   void initialize (void);
 
@@ -1982,6 +1988,7 @@ public:
 
   int cgraph_count;
   int cgraph_max_uid;
+  int cgraph_max_summary_uid;
 
   int edges_count;
   int edges_max_uid;
@@ -2310,6 +2317,7 @@ symbol_table::allocate_cgraph_symbol (void)
   node-uid = cgraph_max_uid++;
 }
 
+  node-summary_uid = cgraph_max_summary_uid++;
   return node;
 }
 
diff --git a/gcc/gengtype.c b/gcc/gengtype.c
index fac83ee..0161004 100644
--- a/gcc/gengtype.c
+++ b/gcc/gengtype.c
@@ -1842,8 +1842,8 @@ open_base_files (void)
   tree-ssa-loop-niter.h, tree-into-ssa.h, tree-dfa.h, 
   tree-ssa.h, reload.h, cpp-id-data.h, tree-chrec.h,
   except.h, output.h,  cfgloop.h, target.h, lto-streamer.h,
-  target-globals.h, ipa-ref.h, cgraph.h, ipa-prop.h, 
-  ipa-inline.h, dwarf2out.h, NULL
+  target-globals.h, ipa-ref.h, cgraph.h, symbol-summary.h,
+  ipa-prop.h, ipa-inline.h, dwarf2out.h, NULL
 };
 const char *const *ifp;
 outf_p gtype_desc_c;
diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
new file mode 100644
index 000..039d052
--- /dev/null
+++ b/gcc/symbol-summary.h
@@ -0,0 +1,317 @@
+/* Callgraph summary data structure.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Martin Liska
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version

Re: [PATCH 4/4] Data structure is used for inline_summary struct.

2014-11-14 Thread Martin Liška

On 11/14/2014 03:09 PM, Martin Liška wrote:

On 11/13/2014 05:04 PM, Jan Hubicka wrote:

+  if (!inline_summary_summary)
+inline_summary_summary = (inline_summary_cgraph_summary *) 
inline_summary_cgraph_summary::create_ggc (symtab);


Hehe, this is funny naming scheme.
Peraps inline_summary_d and inline_summary_t for the data and type?


Hello.

I adopted suggested naming scheme.


-
-static void
-inline_node_duplication_hook (struct cgraph_node *src,
-  struct cgraph_node *dst,
-  ATTRIBUTE_UNUSED void *data)
+void
+inline_summary_cgraph_summary::duplication_hook (cgraph_node *src,
+  cgraph_node *dst,
+  inline_summary *,
+  inline_summary *info)


Becuase those are no longer hooks but virtual function, I guess we could call 
them
simply duplicate/insert/remove.


Agree with the change.



In a way I would like to see these to be methods of the underlying type rather 
than
virtual methods of the summary, becuase these are operations on the data 
themselves.
I was thinking to model these by specual constructor and copy constructor
(taking the extra node pointer parameters) and standard destructor.  I am not 
sure this
would be more understandable this way?


Motivation for this implementation is:
a) it's useful to have an access to cgraph_node that is associated with a sumary
b) with GTY, we cannot call destructors


-/* Need a typedef for inline_summary because of inline function
-   'inline_summary' below.  */
-typedef struct inline_summary inline_summary_t;
-extern GTY(()) vecinline_summary_t, va_gc *inline_summary_vec;
+class GTY((user)) inline_summary_cgraph_summary: public cgraph_summary 
inline_summary *
+{
+public:
+  inline_summary_cgraph_summary (symbol_table *symtab, bool ggc):
+cgraph_summary inline_summary * (symtab, ggc) {}
+
+  static inline_summary_cgraph_summary *create_ggc (symbol_table *symtab)
+  {
+inline_summary_cgraph_summary *summary = new (ggc_cleared_alloc 
inline_summary_cgraph_summary ()) inline_summary_cgraph_summary(symtab, true);
+summary-disable_insertion_hook ();
+return summary;
+  }
+
+
+  virtual void insertion_hook (cgraph_node *, inline_summary *);
+  virtual void removal_hook (cgraph_node *node, inline_summary *);
+  virtual void duplication_hook (cgraph_node *src, cgraph_node *dst, 
inline_summary *src_data, inline_summary *dst_data);
+};
+
+extern GTY(()) cgraph_summary inline_summary * *inline_summary_summary;


All in all it looks better than original code.  If we moved insert/


  /* Information kept about parameter of call site.  */
  struct inline_param_summary
@@ -249,10 +265,10 @@ void clone_inlined_nodes (struct cgraph_edge *e, bool, 
bool, int *,
  extern int ncalls_inlined;
  extern int nfunctions_inlined;

-static inline struct inline_summary *
-inline_summary (struct cgraph_node *node)
+static inline inline_summary *
+get_inline_summary (const struct cgraph_node *node)
  {
-  return (*inline_summary_vec)[node-uid];
+  return (*inline_summary_summary)[node-summary_uid];


Hmm, i guess there is no way to avoid the (*...)? Otherwise it would be cleaner
to use inline_summary[...] instead of get_inline_summary IMO.


I added function_summary::get method, where the usage looks cleaner:
inline_summary_d-get (node).

Thanks,
Martin


Thanks for working on this!
Honza





Patch v3.

Martin
From 7f57a3a762fecea9a20e307f06e868a73da98000 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 14 Nov 2014 14:54:12 +0100
Subject: [PATCH 3/3] Data structure is used for inline_summary struct.

gcc/ChangeLog:

2014-11-12  Martin Liska  mli...@suse.cz

	* cgraphunit.c (symbol_table::process_new_functions):
	inline_summary_vec is replaced with inline_summary_t.
	* ipa-cp.c (ipcp_cloning_candidate_p): Usage of inline_summary_t::get.
	(devirtualization_time_bonus): Likewise.
	(estimate_local_effects): Likewise.
	(ipcp_propagate_stage): Likewise.
	* ipa-inline-analysis.c (evaluate_conditions_for_known_args): Likewise.
	(evaluate_properties_for_edge): Likewise.
	(inline_summary_alloc): Deletion of old hook holders.
	(reset_inline_summary): inline_summary is added as argument.
	(inline_summary_cgraph_summary::removal_hook): New function.
	(inline_summary_cgraph_summary::duplication_hook): Likewise.
	(dump_inline_edge_summary): Struct keyword removed.
	(dump_inline_summary): Likewise.
	(estimate_function_body_sizes): Usage of inline_summary_t::get.
	(compute_inline_parameters): Likewise.
	(estimate_edge_devirt_benefit): Struct keyword removed.
	(estimate_node_size_and_time): Likewise.
	(inline_update_callee_summaries): Likewise.
	(inline_merge_summary): Usage of inline_summary_t::get.
	(inline_update_overall_summary): Likewise.
	(simple_edge_hints): Likewise.
	(do_estimate_edge_time): Likewise.
	(estimate_time_after_inlining): Likewise.
	(estimate_size_after_inlining): Likewise.
	(do_estimate_growth): Likewise.
	(growth_likely_positive): Likewise

Re: [PATCH 4/4] Data structure is used for inline_summary struct.

2014-11-18 Thread Martin Liška

On 11/14/2014 05:06 PM, Jan Hubicka wrote:


In a way I would like to see these to be methods of the underlying type rather 
than
virtual methods of the summary, becuase these are operations on the data 
themselves.
I was thinking to model these by specual constructor and copy constructor
(taking the extra node pointer parameters) and standard destructor.  I am not 
sure this
would be more understandable this way?


Motivation for this implementation is:
a) it's useful to have an access to cgraph_node that is associated with a sumary


Yep, one would have node addition
  ctor (symtab_node *); (or cgraph/varpool nodes for cgraph/varpool annotations)
that would default to ctor for implementations that do not care about node.
And node duplication ctor
  ctor (summary , symtab_node *, symtab_node *)
that would default to copy constructor for data that do not need to be copied.


Hello.

I have no problem with such construction and destruction, we can also provide
base implementation.


I would say that main advantage (in addition to have a way to provide resonable
defaults) is to make ctors/dtors of the embedded classes working well, so one 
can
for example embedd pointer_map and not care about its construction/destruction.


b) with GTY, we cannot call destructor


Everything in symbol table is expecitely memory managed (i.e. enver left
to be freed by garbage collector). It resists in GTY only to allow linking
garbage collected object from them and to get PCH working.


However GTY types need to be allocated by ggc_alloc and one can't call dtor.
This was main motivation for providing hooks instead of ctor/dtor API.
Maybe I miss something?

Thanks,
Martin



This is however quite cosmetic issue I would preffer our C++ guys to comment 
on.  We can
tweak this incrementally.

+void
+inline_summary_t::duplicate (cgraph_node *src,
+cgraph_node *dst,
+inline_summary *,
+inline_summary *info)


Also we should have a way to say that the annotation do not need to be 
duplicated (for example
when we do not want to annotate inline clones). Probably by adding duplicate_p 
predicate that
is called before the actual duplication happens?

The updated patch is OK, I will take a look on the main patch.

Honza

  {
-  struct inline_summary *info;
inline_summary_alloc ();
-  info = inline_summary (dst);
-  memcpy (info, inline_summary (src), sizeof (struct inline_summary));
+  memcpy (info, inline_summary_d-get (src), sizeof (inline_summary));
/* TODO: as an optimization, we may avoid copying conditions
   that are known to be false or true.  */
info-conds = vec_safe_copy (info-conds);
@@ -1328,7 +1309,7 @@ free_growth_caches (void)

  static void
  dump_inline_edge_summary (FILE *f, int indent, struct cgraph_node *node,
- struct inline_summary *info)
+ inline_summary *info)
  {
struct cgraph_edge *edge;
for (edge = node-callees; edge; edge = edge-next_callee)
@@ -1345,8 +1326,8 @@ dump_inline_edge_summary (FILE *f, int indent, struct 
cgraph_node *node,
   ? inlined : cgraph_inline_failed_string (edge- inline_failed),
   indent, , es-loop_depth, edge-frequency,
   es-call_stmt_size, es-call_stmt_time,
-  (int) inline_summary (callee)-size / INLINE_SIZE_SCALE,
-  (int) inline_summary (callee)-estimated_stack_size);
+  (int) inline_summary_d-get (callee)-size / INLINE_SIZE_SCALE,
+  (int) inline_summary_d-get (callee)-estimated_stack_size);

if (es-predicate)
{
@@ -1372,9 +1353,9 @@ dump_inline_edge_summary (FILE *f, int indent, struct 
cgraph_node *node,
  fprintf (f, %*sStack frame offset %i, callee self size %i,
callee size %i\n,
   indent + 2, ,
-  (int) inline_summary (callee)-stack_frame_offset,
-  (int) inline_summary (callee)-estimated_self_stack_size,
-  (int) inline_summary (callee)-estimated_stack_size);
+  (int) inline_summary_d-get (callee)-stack_frame_offset,
+  (int) inline_summary_d-get 
(callee)-estimated_self_stack_size,
+  (int) inline_summary_d-get (callee)-estimated_stack_size);
  dump_inline_edge_summary (f, indent + 2, callee, info);
}
  }
@@ -1402,7 +1383,7 @@ dump_inline_summary (FILE *f, struct cgraph_node *node)
  {
if (node-definition)
  {
-  struct inline_summary *s = inline_summary (node);
+  inline_summary *s = inline_summary_d-get (node);
size_time_entry *e;
int i;
fprintf (f, Inline summary for %s/%i, node-name (),
@@ -1725,7 +1706,7 @@ eliminated_by_inlining_prob (gimple stmt)

  static void
  set_cond_stmt_execution_predicate (struct ipa_node_params *info,
-  struct inline_summary *summary,
+

Re: [PATCH 4/4] Data structure is used for inline_summary struct.

2014-11-19 Thread Martin Liška

On 11/18/2014 11:25 PM, Martin Jambor wrote:

On Tue, Nov 18, 2014 at 07:59:26PM +0100, Jan Hubicka wrote:

Hi,

On Tue, Nov 18, 2014 at 04:39:00PM +0100, Jan Hubicka wrote:

On Fri, Nov 14, 2014 at 08:59:10PM +0100, Jan Hubicka wrote:



b) with GTY, we cannot call destructor


Everything in symbol table is expecitely memory managed (i.e. enver left
to be freed by garbage collector). It resists in GTY only to allow linking
garbage collected object from them and to get PCH working.



Well, if I understand the intent correctly, summaries are for stuff
that is not in the symbol table.  For example jump functions are a

Correct.

vector of structures possibly containing trees, so everything has to
be in garbage collected memory.

When an edge is removed, it is necessary to be notified about it
immediately, for example to decrement rdesc_refcount (you might argue
that that should be done in a separate hook and not from within a
summary class but then you start to rely on hook invocation ordering
so I think it is better to eventually use the summaries for it too).


I do not see why ctors/dtors can not do the reference counting. In fact
this is how refcounting is done usually anyway?



Well, when there is no garbage collection involved then yes, that is
how you normally do it but in the GC case, there is the question of
what is the appropriate time to call destructor on garbage collected
data (like jump functions)?


I still fail to see problem here.  Summaries are explicitly managed- they are
constructed at summary construction time or when new callgarph node is
introduced/duplicated.  They are destroyed when callgarph node is destroyed or
whole summary is ddestroyed.  It is job of the summary datastructure to call
proper ctors/dtors, not job of garbage collector that provides the underlying
memory management.


I do not think that all summaries (in the meaning of a description of
one particular symbol table node or call graph edge) are explicitely
managed.  For example ipa_edge_args or ipa_agg_replacement_value
(which my alignment patch changes to ipcp_transformation_summary) are
allocated in GC memory because they contain trees.



If you have datastructure that points to something that is not
explicitly managed (i.e. tree expression), you just can not have
non-trivial constructor on that datastructure, because that is freed
transparently by gty that don't do destruction...


I admit to not being particularly bright today but that seems to be
exactly my point.


Well, in your case you have datastructure jump_function that contain a pointer
to tree (EXPR).  What I am trying to explain is that I see no reson why
jump_function needs to be POD.


I never said that the summary object needs to be a POD, I only said I
liked the possibility of storing very simple objects (without wrapping
them in classes with constructors and destructors).  That is of course
nothing more than my personal preference.


The tree pointed to by EXPR pointer can not
have a dtor by itself because GGC will not call it upon freeing.

It is true that jump_function lives in GGC memory (to make pointer to expr
work) but it never gets removed by ggc_collect because it is always pointed to
by the summary datastructure.  There are two ways to free the jump_function
datastructure.
   1) removing the symbol node it is attached to.
  Here the symtab code will call removal hook that was registered by 
container
  template. The container will call destructor of jump_function and the 
ggc_free
  its memory
   2) removing the summary.  In this case I would again expect the container
  template to walk all summaries and free them.

So even if your structure lives in GGC memory it is not really garbage
collected and thus the lack of machinery to call dtors at a time ggc decides to
free something is not a problem?

In fact looking at struct default_hashmap_traits, I see:

   /* Called to dispose of the key and value before marking the entry as
  deleted.  */

   templatetypename T static void remove (T v) { v.~T (); }


Now I see, I should have read your previous email more carefully, by
explicitely managed you mean that destructors will be called
explicitely by the summary infrastructure.  I was wondering how you
wanted to rip the summaries out of GGC memory.

Well, I suppose that would work, and since explicit calls to
destructors are basically the counterpart of placement new that we
already plan to use, it might be actually be the proper C++ thing to
do.

(I am not sure I like it though, for all other purposes the summary
objects will look like managed by the garbage collector and only we
who read this thread will know that the lifetime of the object would
be decoupled from the allocation-span of its memory).

Thanks for the clarification,

Martin


Hello.

I tried to come up with ctor/dtor solution for types passes to symbol_summary
template class.

Example:
struct inline_summary
{
  inline_summary (cgraph_node *node);
  

[PATCH] PR lto/63968: 175.vpr from cpu2000 fails to build with LTO

2014-11-20 Thread Martin Liška

Hello.

As I reimplemented fibheap to C++ template, Honza told me that replace_key 
method actually
supports just decrement operation. Old implementation suppress any feedback if 
we try to increase key:

fibheap.c:
...
  /* If we wanted to, we could actually do a real increase by redeleting and
 inserting. However, this would require O (log n) time. So just bail out
 for now.  */
  if (fibheap_comp_data (heap, key, data, node)  0)
return NULL;
...

My reimplementation added assert for such kind operation, as this PR shows we 
try to do increment in reorder-bb.
Thus, I added fibonacci_heap::replace_key method that can increment key (it 
deletes the node and new key
is associated with the node).

The patch can bootstrap on x86_64-linux-pc and no new regression was introduced.
I would like to ask someone if the increase operation for bb-reorder is valid 
or not?

Thanks,
Martin
gcc/ChangeLog:

2014-11-20  Martin Liska  mli...@suse.cz

* bb-reorder.c (find_traces_1_round): decreate_key is replaced
with replace_key method.
* fibonacci_heap.h (fibonacci_heap::insert): New argument.
(fibonacci_heap::replace_key_data): Likewise.
(fibonacci_heap::replace_key): New method that can even increment key,
this operation costs O(log N).
(fibonacci_heap::extract_min): New argument.
(fibonacci_heap::delete_node): Likewise.
diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 689d7b6..b568114 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -644,7 +644,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
    (long) bbd[e-dest-index].node-get_key (),
    key);
 			}
-		  bbd[e-dest-index].heap-decrease_key
+		  bbd[e-dest-index].heap-replace_key
 		(bbd[e-dest-index].node, key);
 		}
 		}
@@ -812,7 +812,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
 			   e-dest-index,
 			   (long) bbd[e-dest-index].node-get_key (), key);
 		}
-		  bbd[e-dest-index].heap-decrease_key
+		  bbd[e-dest-index].heap-replace_key
 		(bbd[e-dest-index].node, key);
 		}
 	}
diff --git a/gcc/fibonacci_heap.h b/gcc/fibonacci_heap.h
index ecb92f8..3fce370 100644
--- a/gcc/fibonacci_heap.h
+++ b/gcc/fibonacci_heap.h
@@ -183,20 +183,27 @@ public:
   }
 
   /* For given NODE, set new KEY value.  */
-  K decrease_key (fibonacci_node_t *node, K key)
+  K replace_key (fibonacci_node_t *node, K key)
   {
 K okey = node-m_key;
-gcc_assert (key = okey);
 
 replace_key_data (node, key, node-m_data);
 return okey;
   }
 
+  /* For given NODE, decrease value to new KEY.  */
+  K decrease_key (fibonacci_node_t *node, K key)
+  {
+gcc_assert (key = node-m_key);
+return replace_key (node, key);
+  }
+
   /* For given NODE, set new KEY and DATA value.  */
   V *replace_key_data (fibonacci_node_t *node, K key, V *data);
 
-  /* Extract minimum node in the heap. */
-  V *extract_min ();
+  /* Extract minimum node in the heap. If RELEASE is specified,
+ memory is released.  */
+  V *extract_min (bool release = true);
 
   /* Return value associated with minimum node in the heap.  */
   V *min ()
@@ -214,12 +221,15 @@ public:
   }
 
   /* Delete NODE in the heap.  */
-  V *delete_node (fibonacci_node_t *node);
+  V *delete_node (fibonacci_node_t *node, bool release = true);
 
   /* Union the heap with HEAPB.  */
   fibonacci_heap *union_with (fibonacci_heap *heapb);
 
 private:
+  /* Insert new NODE given by KEY and DATA associated with the key.  */
+  fibonacci_node_t *insert (fibonacci_node_t *node, K key, V *data);
+
   /* Insert it into the root list.  */
   void insert_root (fibonacci_node_t *node);
 
@@ -322,6 +332,15 @@ fibonacci_heapK,V::insert (K key, V *data)
   /* Create the new node.  */
   fibonacci_nodeK,V *node = new fibonacci_node_t ();
 
+  return insert (node, key, data);
+}
+
+/* Insert new NODE given by KEY and DATA associated with the key.  */
+
+templateclass K, class V
+fibonacci_nodeK,V*
+fibonacci_heapK,V::insert (fibonacci_node_t *node, K key, V *data)
+{
   /* Set the node's data.  */
   node-m_data = data;
   node-m_key = key;
@@ -345,17 +364,22 @@ V*
 fibonacci_heapK,V::replace_key_data (fibonacci_nodeK,V *node, K key,
    V *data)
 {
-  V *odata;
   K okey;
   fibonacci_nodeK,V *y;
+  V *odata = node-m_data;
 
-  /* If we wanted to, we could actually do a real increase by redeleting and
- inserting. However, this would require O (log n) time. So just bail out
- for now.  */
+  /* If we wanted to, we do a real increase by redeleting and
+ inserting.  */
   if (node-compare_data (key)  0)
-return NULL;
+{
+  delete_node (node, false);
+
+  node = new (node) fibonacci_node_t ();
+  insert (node, key, data);
+
+  return odata;
+}
 
-  odata = node-m_data;
   okey = node-m_key;
   node-m_data = data;
   node-m_key = key;
@@ -385,7 +409,7 @@ fibonacci_heapK,V::replace_key_data 

[PATCH] PR ipa/63909 ICE: SIGSEGV in ipa_icf_gimple::func_checker::compare_bb()

2014-11-20 Thread Martin Liška

Hello.

Following patch fixes ICE in IPA ICF. Problem was that number of non-debug 
statements in a BB can
change (for instance by IPA split), so that the number is recomputed.

Patch can bootstrap on x86_64-linux-pc and no regression has been seen.
Ready for trunk?

Thanks,
Martin
gcc/ChangeLog:

2014-11-20  Martin Liska  mli...@suse.cz

* gimple-iterator.h (gsi_nondebug_stmt_count): New function.
* ipa-icf-gimple.c (func_checker::compare_bb): Number of BB
is recomputed because it can be split.

gcc/testsuite/ChangeLog:

2014-11-20  Martin Liska  mli...@suse.cz

* gcc.dg/ipa/pr63909.c: New test.
diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
index fb6cc07..f73b1f6 100644
--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
@@ -331,4 +331,18 @@ gsi_seq (gimple_stmt_iterator i)
   return *i.seq;
 }
 
+/* Return number of nondebug statements in basic block BB.  */
+
+static inline unsigned
+gsi_nondebug_stmt_count (basic_block bb)
+{
+  unsigned c = 0;
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+   gsi_next (gsi))
+if (!is_gimple_debug (gsi_stmt (gsi)))
+  c++;
+
+  return c;
+}
+
 #endif /* GCC_GIMPLE_ITERATOR_H */
diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 8f2a438..83661ac 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -563,6 +563,9 @@ func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
   gimple_stmt_iterator gsi1, gsi2;
   gimple s1, s2;
 
+  bb1-nondbg_stmt_count = gsi_nondebug_stmt_count (bb1-bb);
+  bb2-nondbg_stmt_count = gsi_nondebug_stmt_count (bb2-bb);
+
   if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count
   || bb1-edge_count != bb2-edge_count)
 return return_false ();
diff --git a/gcc/testsuite/gcc.dg/ipa/pr63909.c b/gcc/testsuite/gcc.dg/ipa/pr63909.c
new file mode 100644
index 000..8538e21
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr63909.c
@@ -0,0 +1,27 @@
+/* { dg-options -O2 -fno-guess-branch-probability } */
+
+int z;
+
+__attribute__((noinline))
+void g ()
+{
+  if (++z)
+__builtin_exit (0);
+  g ();
+}
+
+__attribute__((noinline))
+void f ()
+{
+  if (++z)
+__builtin_exit (0);
+  f ();
+}
+
+int main()
+{
+  f ();
+  g ();
+
+  return 0;
+}


Re: [PATCH] PR lto/63968: 175.vpr from cpu2000 fails to build with LTO

2014-11-21 Thread Martin Liška

On 11/20/2014 10:13 PM, Jan Hubicka wrote:

Hello.

As I reimplemented fibheap to C++ template, Honza told me that replace_key 
method actually
supports just decrement operation. Old implementation suppress any feedback if 
we try to increase key:

fibheap.c:
...
   /* If we wanted to, we could actually do a real increase by redeleting and
  inserting. However, this would require O (log n) time. So just bail out
  for now.  */
   if (fibheap_comp_data (heap, key, data, node)  0)
 return NULL;
...

My reimplementation added assert for such kind operation, as this PR shows we 
try to do increment in reorder-bb.
Thus, I added fibonacci_heap::replace_key method that can increment key (it 
deletes the node and new key
is associated with the node).

The patch can bootstrap on x86_64-linux-pc and no new regression was introduced.
I would like to ask someone if the increase operation for bb-reorder is valid 
or not?


Can you verify that the implementation is correct? I tend to remember that I 
introduced the
lazy incerementation to inliner both for perofrmance and correctness reasons. I 
used to get
odd orders when keys was increased.

Honza


Hello.

What kind of correctness do you mean? Old implementation didn't support 
increment operation and the fact was hushed up.


Martin



Thanks,
Martin



gcc/ChangeLog:

2014-11-20  Martin Liska  mli...@suse.cz

* bb-reorder.c (find_traces_1_round): decreate_key is replaced
with replace_key method.
* fibonacci_heap.h (fibonacci_heap::insert): New argument.
(fibonacci_heap::replace_key_data): Likewise.
(fibonacci_heap::replace_key): New method that can even increment key,
this operation costs O(log N).
(fibonacci_heap::extract_min): New argument.
(fibonacci_heap::delete_node): Likewise.



diff --git a/gcc/bb-reorder.c b/gcc/bb-reorder.c
index 689d7b6..b568114 100644
--- a/gcc/bb-reorder.c
+++ b/gcc/bb-reorder.c
@@ -644,7 +644,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type 
count_th,
   (long) bbd[e-dest-index].node-get_key (),
   key);
}
- bbd[e-dest-index].heap-decrease_key
+ bbd[e-dest-index].heap-replace_key
(bbd[e-dest-index].node, key);
}
}
@@ -812,7 +812,7 @@ find_traces_1_round (int branch_th, int exec_th, gcov_type 
count_th,
   e-dest-index,
   (long) bbd[e-dest-index].node-get_key (), 
key);
}
- bbd[e-dest-index].heap-decrease_key
+ bbd[e-dest-index].heap-replace_key
(bbd[e-dest-index].node, key);
}
}
diff --git a/gcc/fibonacci_heap.h b/gcc/fibonacci_heap.h
index ecb92f8..3fce370 100644
--- a/gcc/fibonacci_heap.h
+++ b/gcc/fibonacci_heap.h
@@ -183,20 +183,27 @@ public:
}

/* For given NODE, set new KEY value.  */
-  K decrease_key (fibonacci_node_t *node, K key)
+  K replace_key (fibonacci_node_t *node, K key)
{
  K okey = node-m_key;
-gcc_assert (key = okey);

  replace_key_data (node, key, node-m_data);
  return okey;
}

+  /* For given NODE, decrease value to new KEY.  */
+  K decrease_key (fibonacci_node_t *node, K key)
+  {
+gcc_assert (key = node-m_key);
+return replace_key (node, key);
+  }
+
/* For given NODE, set new KEY and DATA value.  */
V *replace_key_data (fibonacci_node_t *node, K key, V *data);

-  /* Extract minimum node in the heap. */
-  V *extract_min ();
+  /* Extract minimum node in the heap. If RELEASE is specified,
+ memory is released.  */
+  V *extract_min (bool release = true);

/* Return value associated with minimum node in the heap.  */
V *min ()
@@ -214,12 +221,15 @@ public:
}

/* Delete NODE in the heap.  */
-  V *delete_node (fibonacci_node_t *node);
+  V *delete_node (fibonacci_node_t *node, bool release = true);

/* Union the heap with HEAPB.  */
fibonacci_heap *union_with (fibonacci_heap *heapb);

  private:
+  /* Insert new NODE given by KEY and DATA associated with the key.  */
+  fibonacci_node_t *insert (fibonacci_node_t *node, K key, V *data);
+
/* Insert it into the root list.  */
void insert_root (fibonacci_node_t *node);

@@ -322,6 +332,15 @@ fibonacci_heapK,V::insert (K key, V *data)
/* Create the new node.  */
fibonacci_nodeK,V *node = new fibonacci_node_t ();

+  return insert (node, key, data);
+}
+
+/* Insert new NODE given by KEY and DATA associated with the key.  */
+
+templateclass K, class V
+fibonacci_nodeK,V*
+fibonacci_heapK,V::insert (fibonacci_node_t *node, K key, V *data)
+{
/* Set the node's data.  */
node-m_data = data;
node-m_key = key;
@@ -345,17 +364,22 @@ V*
  fibonacci_heapK,V::replace_key_data (fibonacci_nodeK,V *node, K key,

Re: [PATCH 8/9] Negative numbers added for sreal class.

2014-11-21 Thread Martin Liška

On 11/14/2014 11:48 AM, Richard Biener wrote:

On Thu, Nov 13, 2014 at 1:35 PM, mliska mli...@suse.cz wrote:

gcc/ChangeLog:

2014-11-13  Martin Liska  mli...@suse.cz

 * predict.c (propagate_freq): More elegant sreal API is used.
 (estimate_bb_frequencies): New static constants defined by sreal
 replace precomputed ones.
 * sreal.c (sreal::normalize): New function.
 (sreal::to_int): Likewise.
 (sreal::operator+): Likewise.
 (sreal::operator-): Likewise.
 * sreal.h: Definition of new functions added.


Please use gcc_checking_assert()s everywhere.  sreal is supposed
to be fast... (I see it has current uses of gcc_assert - you may want
to mass-convert them as a followup).


---
  gcc/predict.c | 30 +++-
  gcc/sreal.c   | 56 
  gcc/sreal.h   | 75 ---
  3 files changed, 126 insertions(+), 35 deletions(-)

diff --git a/gcc/predict.c b/gcc/predict.c
index 0215e91..0f640f5 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -82,7 +82,7 @@ along with GCC; see the file COPYING3.  If not see

  /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
-static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
+static sreal real_almost_one, real_br_prob_base,
  real_inv_br_prob_base, real_one_half, real_bb_freq_max;

  static void combine_predictions_for_insn (rtx_insn *, basic_block);
@@ -2528,13 +2528,13 @@ propagate_freq (basic_block head, bitmap tovisit)
 bb-count = bb-frequency = 0;
  }

-  BLOCK_INFO (head)-frequency = real_one;
+  BLOCK_INFO (head)-frequency = sreal::one ();
last = head;
for (bb = head; bb; bb = nextbb)
  {
edge_iterator ei;
-  sreal cyclic_probability = real_zero;
-  sreal frequency = real_zero;
+  sreal cyclic_probability = sreal::zero ();
+  sreal frequency = sreal::zero ();

nextbb = BLOCK_INFO (bb)-next;
BLOCK_INFO (bb)-next = NULL;
@@ -2559,13 +2559,13 @@ propagate_freq (basic_block head, bitmap tovisit)
   * BLOCK_INFO (e-src)-frequency /
   REG_BR_PROB_BASE);  */

-   sreal tmp (e-probability, 0);
+   sreal tmp = e-probability;
 tmp *= BLOCK_INFO (e-src)-frequency;
 tmp *= real_inv_br_prob_base;
 frequency += tmp;
   }

- if (cyclic_probability == real_zero)
+ if (cyclic_probability == sreal::zero ())
 {
   BLOCK_INFO (bb)-frequency = frequency;
 }
@@ -2577,7 +2577,7 @@ propagate_freq (basic_block head, bitmap tovisit)
   /* BLOCK_INFO (bb)-frequency = frequency
   / (1 - cyclic_probability) */

- cyclic_probability = real_one - cyclic_probability;
+ cyclic_probability = sreal::one () - cyclic_probability;
   BLOCK_INFO (bb)-frequency = frequency / cyclic_probability;
 }
 }
@@ -2591,7 +2591,7 @@ propagate_freq (basic_block head, bitmap tovisit)
  = ((e-probability * BLOCK_INFO (bb)-frequency)
  / REG_BR_PROB_BASE); */

- sreal tmp (e-probability, 0);
+ sreal tmp = e-probability;
   tmp *= BLOCK_INFO (bb)-frequency;
   EDGE_INFO (e)-back_edge_prob = tmp * real_inv_br_prob_base;
 }
@@ -2873,13 +2873,11 @@ estimate_bb_frequencies (bool force)
if (!real_values_initialized)
  {
   real_values_initialized = 1;
- real_zero = sreal (0, 0);
- real_one = sreal (1, 0);
- real_br_prob_base = sreal (REG_BR_PROB_BASE, 0);
- real_bb_freq_max = sreal (BB_FREQ_MAX, 0);
+ real_br_prob_base = REG_BR_PROB_BASE;
+ real_bb_freq_max = BB_FREQ_MAX;
   real_one_half = sreal (1, -1);
- real_inv_br_prob_base = real_one / real_br_prob_base;
- real_almost_one = real_one - real_inv_br_prob_base;
+ real_inv_br_prob_base = sreal::one () / real_br_prob_base;
+ real_almost_one = sreal::one () - real_inv_br_prob_base;
 }

mark_dfs_back_edges ();
@@ -2897,7 +2895,7 @@ estimate_bb_frequencies (bool force)

   FOR_EACH_EDGE (e, ei, bb-succs)
 {
- EDGE_INFO (e)-back_edge_prob = sreal (e-probability, 0);
+ EDGE_INFO (e)-back_edge_prob = e-probability;
   EDGE_INFO (e)-back_edge_prob *= real_inv_br_prob_base;
 }
 }
@@ -2906,7 +2904,7 @@ estimate_bb_frequencies (bool force)
   to outermost to examine frequencies for back edges.  */
estimate_loops ();

-  freq_max = real_zero;
+  freq_max = sreal::zero ();
FOR_EACH_BB_FN (bb, cfun)
 if (freq_max  BLOCK_INFO (bb)-frequency)
   

Re: [PATCH] PR ipa/63909 ICE: SIGSEGV in ipa_icf_gimple::func_checker::compare_bb()

2014-11-21 Thread Martin Liška

On 11/20/2014 05:41 PM, Richard Biener wrote:

On Thu, Nov 20, 2014 at 5:30 PM, Martin Liška mli...@suse.cz wrote:

Hello.

Following patch fixes ICE in IPA ICF. Problem was that number of non-debug
statements in a BB can
change (for instance by IPA split), so that the number is recomputed.


Huh, so can it get different for both candidates?  I think the stmt compare
loop should be terminated on gsi_end_p of either iterator and return
false for any remaining non-debug-stmts on the other.

Thus, not walk all stmts twice here.


Hello.

Sorry for the previous patch, you are right it can be fixed in purer 
way. Please take a look at attached patch.




As IPA split is run early I don't see how it should affect a real IPA
pass though?




Sorry for non precise information, the problematic BB is changed here:
#0  gsi_split_seq_before (i=0x7fffd550, pnew_seq=0x7fffd528) at 
../../gcc/gimple-iterator.c:429
#1  0x00b95a2a in gimple_split_block (bb=0x76c41548, 
stmt=0x0) at ../../gcc/tree-cfg.c:5707
#2  0x007563cf in split_block (bb=0x76c41548, i=i@entry=0x0) 
at ../../gcc/cfghooks.c:508
#3  0x00756b44 in split_block_after_labels (bb=optimized out) 
at ../../gcc/cfghooks.c:549
#4  make_forwarder_block (bb=optimized out, 
redirect_edge_p=redirect_edge_p@entry=0x75d4e0 
mfb_keep_just(edge_def*), new_bb_cbk=new_bb_cbk@entry=0x0) at 
../../gcc/cfghooks.c:842
#5  0x0076085a in create_preheader (loop=0x76d56948, 
flags=optimized out) at ../../gcc/cfgloopmanip.c:1563
#6  0x00760aea in create_preheaders (flags=1) at 
../../gcc/cfgloopmanip.c:1613
#7  0x009bc6b0 in apply_loop_flags (flags=15) at 
../../gcc/loop-init.c:75
#8  0x009bc7d3 in loop_optimizer_init (flags=15) at 
../../gcc/loop-init.c:136
#9  0x00957914 in estimate_function_body_sizes 
(node=0x76c47620, early=false) at ../../gcc/ipa-inline-analysis.c:2480
#10 0x0095948b in compute_inline_parameters 
(node=0x76c47620, early=false) at ../../gcc/ipa-inline-analysis.c:2907
#11 0x0095bd88 in inline_analyze_function (node=0x76c47620) 
at ../../gcc/ipa-inline-analysis.c:3994
#12 0x0095bed3 in inline_generate_summary () at 
../../gcc/ipa-inline-analysis.c:4045
#13 0x00a70b71 in execute_ipa_summary_passes 
(ipa_pass=0x1dcb9e0) at ../../gcc/passes.c:2137

#14 0x00777a15 in ipa_passes () at ../../gcc/cgraphunit.c:2074
#15 symbol_table::compile (this=this@entry=0x76c3a000) at 
../../gcc/cgraphunit.c:2187
#16 0x00778bcd in symbol_table::finalize_compilation_unit 
(this=0x76c3a000) at ../../gcc/cgraphunit.c:2340
#17 0x006580ee in c_write_global_declarations () at 
../../gcc/c/c-decl.c:10777

#18 0x00b5bb8b in compile_file () at ../../gcc/toplev.c:584
#19 0x00b5def1 in do_compile () at ../../gcc/toplev.c:2041
#20 0x00b5e0fa in toplev::main (this=0x7fffdc9f, argc=20, 
argv=0x7fffdd98) at ../../gcc/toplev.c:2138
#21 0x0063f1d9 in main (argc=20, argv=0x7fffdd98) at 
../../gcc/main.c:38


Patch can bootstrap on x86_64-linux-pc and no regression has been seen.
Ready for trunk?


Thanks,
Martin



Thanks,
Richard.


Patch can bootstrap on x86_64-linux-pc and no regression has been seen.
Ready for trunk?

Thanks,
Martin


From 09b90f6a5ec1e49464f57c333af43574ad8c1375 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Thu, 20 Nov 2014 16:28:54 +0100
Subject: [PATCH] Fix and new test.

gcc/ChangeLog:

2014-11-21  Martin Liska  mli...@suse.cz

	* gimple-iterator.h (gsi_start_bb_nondebug): New function.
	* ipa-icf-gimple.c (func_checker::compare_bb): Correct iteration
	replaces loop based on precomputed number of non-debug statements.

gcc/testsuite/ChangeLog:

2014-11-21  Martin Liska  mli...@suse.cz

	* gcc.dg/ipa/pr63909.c: New test.
---
 gcc/gimple-iterator.h  | 13 +
 gcc/ipa-icf-gimple.c   | 25 ++---
 gcc/testsuite/gcc.dg/ipa/pr63909.c | 27 +++
 3 files changed, 50 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr63909.c

diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
index fb6cc07..e9602b3 100644
--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
@@ -211,6 +211,19 @@ gsi_stmt (gimple_stmt_iterator i)
   return i.ptr;
 }
 
+/* Return a new iterator pointing to the first non-debug statement
+   in basic block BB.  */
+
+static inline gimple_stmt_iterator
+gsi_start_bb_nondebug (basic_block bb)
+{
+  gimple_stmt_iterator gsi = gsi_start_bb (bb);
+  while (!gsi_end_p (gsi)  is_gimple_debug (gsi_stmt (gsi)))
+gsi_next (gsi);
+
+  return gsi;
+}
+
 /* Return a block statement iterator that points to the first non-label
statement in block BB.  */
 
diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 8f2a438..ec0290a 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -559,24 +559,16 @@ func_checker::parse_labels (sem_bb *bb

Re: [PATCH 8/9] Negative numbers added for sreal class.

2014-11-21 Thread Martin Liška

On 11/21/2014 01:03 PM, Richard Biener wrote:

On Fri, Nov 21, 2014 at 12:21 PM, Martin Liška mli...@suse.cz wrote:

On 11/14/2014 11:48 AM, Richard Biener wrote:


On Thu, Nov 13, 2014 at 1:35 PM, mliska mli...@suse.cz wrote:


gcc/ChangeLog:

2014-11-13  Martin Liska  mli...@suse.cz

  * predict.c (propagate_freq): More elegant sreal API is used.
  (estimate_bb_frequencies): New static constants defined by sreal
  replace precomputed ones.
  * sreal.c (sreal::normalize): New function.
  (sreal::to_int): Likewise.
  (sreal::operator+): Likewise.
  (sreal::operator-): Likewise.
  * sreal.h: Definition of new functions added.



Please use gcc_checking_assert()s everywhere.  sreal is supposed
to be fast... (I see it has current uses of gcc_assert - you may want
to mass-convert them as a followup).


---
   gcc/predict.c | 30 +++-
   gcc/sreal.c   | 56 
   gcc/sreal.h   | 75
---
   3 files changed, 126 insertions(+), 35 deletions(-)

diff --git a/gcc/predict.c b/gcc/predict.c
index 0215e91..0f640f5 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -82,7 +82,7 @@ along with GCC; see the file COPYING3.  If not see

   /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
-static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
+static sreal real_almost_one, real_br_prob_base,
   real_inv_br_prob_base, real_one_half, real_bb_freq_max;

   static void combine_predictions_for_insn (rtx_insn *, basic_block);
@@ -2528,13 +2528,13 @@ propagate_freq (basic_block head, bitmap tovisit)
  bb-count = bb-frequency = 0;
   }

-  BLOCK_INFO (head)-frequency = real_one;
+  BLOCK_INFO (head)-frequency = sreal::one ();
 last = head;
 for (bb = head; bb; bb = nextbb)
   {
 edge_iterator ei;
-  sreal cyclic_probability = real_zero;
-  sreal frequency = real_zero;
+  sreal cyclic_probability = sreal::zero ();
+  sreal frequency = sreal::zero ();

 nextbb = BLOCK_INFO (bb)-next;
 BLOCK_INFO (bb)-next = NULL;
@@ -2559,13 +2559,13 @@ propagate_freq (basic_block head, bitmap tovisit)
* BLOCK_INFO (e-src)-frequency /
REG_BR_PROB_BASE);  */

-   sreal tmp (e-probability, 0);
+   sreal tmp = e-probability;
  tmp *= BLOCK_INFO (e-src)-frequency;
  tmp *= real_inv_br_prob_base;
  frequency += tmp;
}

- if (cyclic_probability == real_zero)
+ if (cyclic_probability == sreal::zero ())
  {
BLOCK_INFO (bb)-frequency = frequency;
  }
@@ -2577,7 +2577,7 @@ propagate_freq (basic_block head, bitmap tovisit)
/* BLOCK_INFO (bb)-frequency = frequency
/ (1 - cyclic_probability)
*/

- cyclic_probability = real_one - cyclic_probability;
+ cyclic_probability = sreal::one () - cyclic_probability;
BLOCK_INFO (bb)-frequency = frequency /
cyclic_probability;
  }
  }
@@ -2591,7 +2591,7 @@ propagate_freq (basic_block head, bitmap tovisit)
   = ((e-probability * BLOCK_INFO (bb)-frequency)
   / REG_BR_PROB_BASE); */

- sreal tmp (e-probability, 0);
+ sreal tmp = e-probability;
tmp *= BLOCK_INFO (bb)-frequency;
EDGE_INFO (e)-back_edge_prob = tmp * real_inv_br_prob_base;
  }
@@ -2873,13 +2873,11 @@ estimate_bb_frequencies (bool force)
 if (!real_values_initialized)
   {
real_values_initialized = 1;
- real_zero = sreal (0, 0);
- real_one = sreal (1, 0);
- real_br_prob_base = sreal (REG_BR_PROB_BASE, 0);
- real_bb_freq_max = sreal (BB_FREQ_MAX, 0);
+ real_br_prob_base = REG_BR_PROB_BASE;
+ real_bb_freq_max = BB_FREQ_MAX;
real_one_half = sreal (1, -1);
- real_inv_br_prob_base = real_one / real_br_prob_base;
- real_almost_one = real_one - real_inv_br_prob_base;
+ real_inv_br_prob_base = sreal::one () / real_br_prob_base;
+ real_almost_one = sreal::one () - real_inv_br_prob_base;
  }

 mark_dfs_back_edges ();
@@ -2897,7 +2895,7 @@ estimate_bb_frequencies (bool force)

FOR_EACH_EDGE (e, ei, bb-succs)
  {
- EDGE_INFO (e)-back_edge_prob = sreal (e-probability, 0);
+ EDGE_INFO (e)-back_edge_prob = e-probability;
EDGE_INFO (e)-back_edge_prob *= real_inv_br_prob_base;
  }
  }
@@ -2906,7 +2904,7 @@ estimate_bb_frequencies (bool force)
to outermost to examine frequencies for back edges

Re: [PATCH 8/9] Negative numbers added for sreal class.

2014-11-21 Thread Martin Liška

On 11/21/2014 04:02 PM, Richard Biener wrote:

On Fri, Nov 21, 2014 at 3:39 PM, Martin Liška mli...@suse.cz wrote:


Hello.

Ok, this is simplified, one can use sreal a = 12345 and it works ;)


that's a  new API, right?  There is no max () and I think that using
LONG_MIN here is asking for trouble (host dependence).  The
comment in the file says the max should be
sreal (SREAL_MAX_SIG, SREAL_MAX_EXP) and the min
sreal (-SREAL_MAX_SIG, SREAL_MAX_EXP)?



Sure, sreal can store much bigger(smaller) numbers :)


Where do you need sreal::to_double?  The host shouldn't perform
double calculations so it can be only for dumping?  In which case
the user should have used sreal::dump (), maybe with extra
arguments.



That new function was request from Honza, only for debugging purpose.
I agree that dump should this kind of job.

If no other problem, I will run tests once more and commit it.
Thanks,
Martin


-#define SREAL_MAX_EXP (INT_MAX / 4)
+#define SREAL_MAX_EXP (INT_MAX / 8)

this change doesn't look necessary anymore?

Btw, it's also odd that...

  #define SREAL_PART_BITS 32
...
  #define SREAL_MIN_SIG ((uint64_t) 1  (SREAL_PART_BITS - 1))
  #define SREAL_MAX_SIG (((uint64_t) 1  SREAL_PART_BITS) - 1)

thus all m_sig values fit in 32bits but we still use a uint64_t m_sig ...
(the implementation uses 64bit for internal computations, but still
the storage is wasteful?)

Of course the way normalize() works requires that storage to be
64bits to store unnormalized values.

I'd say ok with the SREAL_MAX_EXP change reverted.



Hi.

You are right, this change was done because I used one bit for 
m_negative (bitfield), not needed any more.


Final version attached.

Thank you,
Martin


Thanks,
Richard.





Otherwise looks good to me and sorry for not noticing the above
earlier.

Thanks,
Richard.


Thanks,
Martin



};

extern void debug (sreal ref);
@@ -76,12 +133,12 @@ inline sreal operator+= (sreal a, const sreal
b)

inline sreal operator-= (sreal a, const sreal b)
{
-return a = a - b;
+  return a = a - b;
}

inline sreal operator/= (sreal a, const sreal b)
{
-return a = a / b;
+  return a = a / b;
}

inline sreal operator*= (sreal a, const sreal b)
--
2.1.2








From b28e4264b5f9965ca5ab4f52ce6f4c9df00d4800 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 21 Nov 2014 12:07:40 +0100
Subject: [PATCH 1/2] Negative numbers added for sreal class.

gcc/ChangeLog:

2014-11-13  Martin Liska  mli...@suse.cz

	* predict.c (propagate_freq): More elegant sreal API is used.
	(estimate_bb_frequencies): Precomputed constants replaced by integer
	constants.
	* sreal.c (sreal::normalize): New function.
	(sreal::to_int): Likewise.
	(sreal::operator+): Likewise.
	(sreal::operator-): Likewise.
	* sreal.h: Definition of new functions added.
---
 gcc/predict.c |  30 
 gcc/sreal.c   | 114 --
 gcc/sreal.h   |  82 +-
 3 files changed, 174 insertions(+), 52 deletions(-)

diff --git a/gcc/predict.c b/gcc/predict.c
index 779af11..0cfe4a9 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -82,7 +82,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
 		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
-static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
+static sreal real_almost_one, real_br_prob_base,
 	 real_inv_br_prob_base, real_one_half, real_bb_freq_max;
 
 static void combine_predictions_for_insn (rtx_insn *, basic_block);
@@ -2541,13 +2541,13 @@ propagate_freq (basic_block head, bitmap tovisit)
 	bb-count = bb-frequency = 0;
 }
 
-  BLOCK_INFO (head)-frequency = real_one;
+  BLOCK_INFO (head)-frequency = 1;
   last = head;
   for (bb = head; bb; bb = nextbb)
 {
   edge_iterator ei;
-  sreal cyclic_probability = real_zero;
-  sreal frequency = real_zero;
+  sreal cyclic_probability = 0;
+  sreal frequency = 0;
 
   nextbb = BLOCK_INFO (bb)-next;
   BLOCK_INFO (bb)-next = NULL;
@@ -2572,13 +2572,13 @@ propagate_freq (basic_block head, bitmap tovisit)
   * BLOCK_INFO (e-src)-frequency /
   REG_BR_PROB_BASE);  */
 
-		sreal tmp (e-probability, 0);
+		sreal tmp = e-probability;
 		tmp *= BLOCK_INFO (e-src)-frequency;
 		tmp *= real_inv_br_prob_base;
 		frequency += tmp;
 	  }
 
-	  if (cyclic_probability == real_zero)
+	  if (cyclic_probability == 0)
 	{
 	  BLOCK_INFO (bb)-frequency = frequency;
 	}
@@ -2590,7 +2590,7 @@ propagate_freq (basic_block head, bitmap tovisit)
 	  /* BLOCK_INFO (bb)-frequency = frequency
 	  / (1 - cyclic_probability) */
 
-	  cyclic_probability = real_one - cyclic_probability;
+	  cyclic_probability = sreal (1) - cyclic_probability;
 	  BLOCK_INFO (bb)-frequency = frequency / cyclic_probability;
 	}
 	}
@@ -2604,7 +2604,7 @@ propagate_freq (basic_block head, bitmap tovisit

[PATCH] IPA ICF: memory leak fix

2014-11-22 Thread Martin Liška
Hello.

Following patch removes memory leak that was introduced by very first IPA ICF 
patch.
I would like to thank David for hunting the leak.

Patch an bootstrap on x86_86-linux-pc and no regression is introduced.

Thanks,
Martin
From f959905e984a84d0353fb1e32ba83db2b6dfe4d2 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 21 Nov 2014 16:04:06 +0100
Subject: [PATCH] IPA ICF: memory leak fix

gcc/ChangeLog:

2014-11-21  David Malcolm  dmalc...@redhat.com
	Martin Liska  mli...@suse.cz

	* ipa-icf.c (sem_function::equals_private): auto_vecint replaces
	int* allocated with XNEWVEC.
	(sem_function::bb_dict_test): Likewise.
	* ipa-icf.h: Likewise.
---
 gcc/ipa-icf.c | 15 ++-
 gcc/ipa-icf.h |  2 +-
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index e0633e7..4a0fcfb 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -410,7 +410,6 @@ sem_function::equals_private (sem_item *item,
   basic_block bb1, bb2;
   edge e1, e2;
   edge_iterator ei1, ei2;
-  int *bb_dict = NULL;
   bool result = true;
   tree arg1, arg2;
 
@@ -489,8 +488,8 @@ sem_function::equals_private (sem_item *item,
   /* Basic block edges check.  */
   for (unsigned i = 0; i  bb_sorted.length (); ++i)
 {
-  bb_dict = XNEWVEC (int, bb_sorted.length () + 2);
-  memset (bb_dict, -1, (bb_sorted.length () + 2) * sizeof (int));
+  auto_vecint bb_dict;
+  bb_dict.safe_grow_cleared (bb_sorted.length () + 2);
 
   bb1 = bb_sorted[i]-bb;
   bb2 = m_compared_func-bb_sorted[i]-bb;
@@ -957,9 +956,15 @@ sem_function::icf_handled_component_p (tree t)
corresponds to TARGET.  */
 
 bool
-sem_function::bb_dict_test (int* bb_dict, int source, int target)
+sem_function::bb_dict_test (auto_vecint bb_dict, int source, int target)
 {
-  if (bb_dict[source] == -1)
+  /* bb_dict is cleared with zeros, so that source and target are
+ incremented. bb_dist is used to verify that edges in source and
+ target function correspond. */
+
+  source++;
+  target++;
+  if (bb_dict[source] == 0)
 {
   bb_dict[source] = target;
   return true;
diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index 046e858..75db93a 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -275,7 +275,7 @@ private:
 
   /* Basic blocks dictionary BB_DICT returns true if SOURCE index BB
  corresponds to TARGET.  */
-  bool bb_dict_test (int* bb_dict, int source, int target);
+  bool bb_dict_test (auto_vecint bb_dict, int source, int target);
 
   /* Iterates all tree types in T1 and T2 and returns true if all types
  are compatible. If COMPARE_POLYMORPHIC is set to true,
-- 
2.1.2



Re: [PATCH] IPA ICF: memory leak fix

2014-11-22 Thread Martin Liška
On 11/22/2014 10:09 AM, Markus Trippelsdorf wrote:
 On 2014.11.22 at 09:05 +0100, Martin Liška wrote:
 Hello.

 Following patch removes memory leak that was introduced by very first IPA 
 ICF patch.
 I would like to thank David for hunting the leak.

 Patch an bootstrap on x86_86-linux-pc and no regression is introduced.
 
 I gave the patch a quick spin on gcc112:
 
 *** Error in `/home/trippels/gcc_build_dir/./prev-gcc/lto1': free(): invalid 
 next size (fast): 0x01000a5fc160 ***
 === Backtrace: =
 /lib64/libc.so.6(+0xa3d9c)[0x3fff7b6b3d9c]
 /lib64/libc.so.6(+0xaf0b4)[0x3fff7b6bf0b4]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN3vecIi7va_heap6vl_ptrE7releaseEv-0x1d4bc00)[0x1025dd88]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function14equals_privateEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c083c)[0x116586bc]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function6equalsEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c0578)[0x11658998]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf18sem_item_optimizer7executeEv-0x9b8774)[0x11660a84]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12pass_ipa_icf7executeEP8function-0x9b0314)[0x11668efc]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z16execute_one_passP8opt_pass-0x1647588)[0x1098a0a8]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z21execute_ipa_pass_listP8opt_pass-0x1644c2c)[0x1098ca7c]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z8lto_mainv-0x1df20e4)[0x101b494c]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1[0x10b599b8]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN6toplev4mainEiPPc-0x1e8be70)[0x101507b8]
 /home/trippels/gcc_build_dir/./prev-gcc/lto1(main-0x1ec8d8c)[0x1015493c]
 /lib64/libc.so.6(+0x447ac)[0x3fff7b6547ac]
 /lib64/libc.so.6(__libc_start_main-0x19cbf4)[0x3fff7b6549d4]
 === Memory map: 
 ...
 

Hello.

Thank you for testing, problem is that I should grow the vector by 1, because 
'0' is used as NULL value.

Please try my fixed patch.

Thanks,
Martin
From 7280e2c8de246c72d2608b5c58590f4fabaf6234 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 21 Nov 2014 16:04:06 +0100
Subject: [PATCH] IPA ICF: memory leak fix

gcc/ChangeLog:

2014-11-21  David Malcolm  dmalc...@redhat.com
	Martin Liska  mli...@suse.cz

	* ipa-icf.c (sem_function::equals_private): auto_vecint replaces
	int* allocated with XNEWVEC.
	(sem_function::bb_dict_test): Likewise.
	* ipa-icf.h: Likewise.
---
 gcc/ipa-icf.c | 18 +-
 gcc/ipa-icf.h |  2 +-
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index e0633e7..c8060bf 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -410,7 +410,6 @@ sem_function::equals_private (sem_item *item,
   basic_block bb1, bb2;
   edge e1, e2;
   edge_iterator ei1, ei2;
-  int *bb_dict = NULL;
   bool result = true;
   tree arg1, arg2;
 
@@ -489,8 +488,11 @@ sem_function::equals_private (sem_item *item,
   /* Basic block edges check.  */
   for (unsigned i = 0; i  bb_sorted.length (); ++i)
 {
-  bb_dict = XNEWVEC (int, bb_sorted.length () + 2);
-  memset (bb_dict, -1, (bb_sorted.length () + 2) * sizeof (int));
+  auto_vecint bb_dict;
+  /* Size of bb_dict is number of basic blocks plus
+ 2 for entry and exit block and plus one because
+	 '0' is used as NULL value.  */
+  bb_dict.safe_grow_cleared (bb_sorted.length () + 3);
 
   bb1 = bb_sorted[i]-bb;
   bb2 = m_compared_func-bb_sorted[i]-bb;
@@ -957,9 +959,15 @@ sem_function::icf_handled_component_p (tree t)
corresponds to TARGET.  */
 
 bool
-sem_function::bb_dict_test (int* bb_dict, int source, int target)
+sem_function::bb_dict_test (auto_vecint bb_dict, int source, int target)
 {
-  if (bb_dict[source] == -1)
+  /* bb_dict is cleared with zeros, so that source and target are
+ incremented. bb_dist is used to verify that edges in source and
+ target function correspond. */
+
+  source++;
+  target++;
+  if (bb_dict[source] == 0)
 {
   bb_dict[source] = target;
   return true;
diff --git a/gcc/ipa-icf.h b/gcc/ipa-icf.h
index 046e858..75db93a 100644
--- a/gcc/ipa-icf.h
+++ b/gcc/ipa-icf.h
@@ -275,7 +275,7 @@ private:
 
   /* Basic blocks dictionary BB_DICT returns true if SOURCE index BB
  corresponds to TARGET.  */
-  bool bb_dict_test (int* bb_dict, int source, int target);
+  bool bb_dict_test (auto_vecint bb_dict, int source, int target);
 
   /* Iterates all tree types in T1 and T2 and returns true if all types
  are compatible. If COMPARE_POLYMORPHIC is set to true,
-- 
2.1.2



[PATCH] sreal class fix for PR64050 and PR64060

2014-11-25 Thread Martin Liška

Hello.

Following patch is fix sreal problems that are mentioned in PR64050, PR64060.
I added new GCC plugin test where I test sreal arithmetics and number 
comparison.

Patch can bootstrap on ppc64-linux-pc and x86_64-linux-pc and can pass 
regression
tests.

Thanks,
Martin
gcc/ChangeLog:

2014-11-25  Martin Liska  Martin li...@suse.cz

PR bootstrap/64050
PR ipa/64060
* sreal.c (sreal::operator+): Addition fixed.
(sreal::signedless_plus): Negative numbers are
handled correctly.
(sreal::operator-): Subtraction is fixed.
(sreal::signedless_minus): Negative numbers are
handled correctly.
* sreal.h (sreal::operator): Equal negative numbers
are compared correctly.
(sreal::shift): New checking asserts are introduced.
Operation is fixed.

gcc/testsuite/ChangeLog:

2014-11-25  Martin Liska  Martin li...@suse.cz

PR bootstrap/64050
PR ipa/64060
* gcc.dg/plugin/plugin.exp: New plugin.
* gcc.dg/plugin/sreal-test-1.c: New test.
* gcc.dg/plugin/sreal_plugin.c: New test.
diff --git a/gcc/sreal.c b/gcc/sreal.c
index 0337f9e..2b5e3ae 100644
--- a/gcc/sreal.c
+++ b/gcc/sreal.c
@@ -182,9 +182,9 @@ sreal::operator+ (const sreal other) const
 {
   sreal tmp = -(*b_p);
   if (*a_p  tmp)
-	return signedless_minus (tmp, *a_p, false);
+	return signedless_minus (tmp, *a_p, true);
   else
-	return signedless_minus (*a_p, tmp, true);
+	return signedless_minus (*a_p, tmp, false);
 }
 
   gcc_checking_assert (a_p-m_negative == b_p-m_negative);
@@ -203,7 +203,7 @@ sreal::signedless_plus (const sreal a, const sreal b, bool negative)
   const sreal *a_p = a;
   const sreal *b_p = b;
 
-  if (*a_p  *b_p)
+  if (a_p-m_exp  b_p-m_exp)
 std::swap (a_p, b_p);
 
   dexp = a_p-m_exp - b_p-m_exp;
@@ -211,6 +211,7 @@ sreal::signedless_plus (const sreal a, const sreal b, bool negative)
   if (dexp  SREAL_BITS)
 {
   r.m_sig = a_p-m_sig;
+  r.m_negative = negative;
   return r;
 }
 
@@ -248,11 +249,11 @@ sreal::operator- (const sreal other) const
   /* We want to substract a smaller number from bigger
 for nonegative numbers.  */
   if (!m_negative  *this  other)
-return -signedless_minus (other, *this, true);
+return signedless_minus (other, *this, true);
 
   /* Example: -2 - (-3) = 3 - 2 */
   if (m_negative  *this  other)
-return signedless_minus (-other, -(*this), true);
+return signedless_minus (-other, -(*this), false);
 
   sreal r = signedless_minus (*this, other, m_negative);
 
@@ -274,6 +275,7 @@ sreal::signedless_minus (const sreal a, const sreal b, bool negative)
   if (dexp  SREAL_BITS)
 {
   r.m_sig = a_p-m_sig;
+  r.m_negative = negative;
   return r;
 }
   if (dexp == 0)
diff --git a/gcc/sreal.h b/gcc/sreal.h
index 1362bf6..3938c6e 100644
--- a/gcc/sreal.h
+++ b/gcc/sreal.h
@@ -60,6 +60,11 @@ public:
 
   bool operator (const sreal other) const
   {
+/* We negate result in case of negative numbers and
+   it would return true for equal negative numbers.  */
+if (*this == other)
+  return false;
+
 if (m_negative != other.m_negative)
   return m_negative  other.m_negative;
 
@@ -86,10 +91,19 @@ public:
 return tmp;
   }
 
-  sreal shift (int sig) const
+  sreal shift (int s) const
   {
+gcc_checking_assert (s = SREAL_BITS);
+gcc_checking_assert (s = -SREAL_BITS);
+
+/* Exponent should never be so large because shift_right is used only by
+ sreal_add and sreal_sub ant thus the number cannot be shifted out from
+ exponent range.  */
+gcc_checking_assert (m_exp + s = SREAL_MAX_EXP);
+gcc_checking_assert (m_exp + s = -SREAL_MAX_EXP);
+
 sreal tmp = *this;
-tmp.m_sig += sig;
+tmp.m_exp += s;
 
 return tmp;
   }
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index e4b5f54..c12b3da 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -59,6 +59,7 @@ set plugin_test_list [list \
 { selfassign.c self-assign-test-1.c self-assign-test-2.c } \
 { ggcplug.c ggcplug-test-1.c } \
 { one_time_plugin.c one_time-test-1.c } \
+{ sreal_plugin.c sreal-test-1.c } \
 { start_unit_plugin.c start_unit-test-1.c } \
 { finish_unit_plugin.c finish_unit-test-1.c } \
 ]
diff --git a/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c b/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c
new file mode 100644
index 000..1bce2cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/sreal-test-1.c
@@ -0,0 +1,8 @@
+/* Test that pass is inserted and invoked once. */
+/* { dg-do compile } */
+/* { dg-options -O } */
+
+int main (int argc, char **argv)
+{
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c b/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c
new file mode 100644
index 000..f113816
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/sreal_plugin.c
@@ -0,0 

Re: [PATCH] IPA ICF: memory leak fix

2014-11-27 Thread Martin Liška

On 11/23/2014 10:01 AM, Markus Trippelsdorf wrote:

On 2014.11.22 at 17:46 +0100, Markus Trippelsdorf wrote:

On 2014.11.22 at 16:04 +0100, Martin Liška wrote:

On 11/22/2014 10:09 AM, Markus Trippelsdorf wrote:

On 2014.11.22 at 09:05 +0100, Martin Liška wrote:


Following patch removes memory leak that was introduced by very first IPA ICF 
patch.
I would like to thank David for hunting the leak.

Patch an bootstrap on x86_86-linux-pc and no regression is introduced.


I gave the patch a quick spin on gcc112:

*** Error in `/home/trippels/gcc_build_dir/./prev-gcc/lto1': free(): invalid 
next size (fast): 0x01000a5fc160 ***
=== Backtrace: =
/lib64/libc.so.6(+0xa3d9c)[0x3fff7b6b3d9c]
/lib64/libc.so.6(+0xaf0b4)[0x3fff7b6bf0b4]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN3vecIi7va_heap6vl_ptrE7releaseEv-0x1d4bc00)[0x1025dd88]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function14equals_privateEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c083c)[0x116586bc]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12sem_function6equalsEPNS_8sem_itemER8hash_mapIP11symtab_nodeS2_22default_hashmap_traitsE-0x9c0578)[0x11658998]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf18sem_item_optimizer7executeEv-0x9b8774)[0x11660a84]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN7ipa_icf12pass_ipa_icf7executeEP8function-0x9b0314)[0x11668efc]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z16execute_one_passP8opt_pass-0x1647588)[0x1098a0a8]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z21execute_ipa_pass_listP8opt_pass-0x1644c2c)[0x1098ca7c]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_Z8lto_mainv-0x1df20e4)[0x101b494c]
/home/trippels/gcc_build_dir/./prev-gcc/lto1[0x10b599b8]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(_ZN6toplev4mainEiPPc-0x1e8be70)[0x101507b8]
/home/trippels/gcc_build_dir/./prev-gcc/lto1(main-0x1ec8d8c)[0x1015493c]
/lib64/libc.so.6(+0x447ac)[0x3fff7b6547ac]
/lib64/libc.so.6(__libc_start_main-0x19cbf4)[0x3fff7b6549d4]
=== Memory map: 
...



Thank you for testing, problem is that I should grow the vector by 1, because 
'0' is used as NULL value.

Please try my fixed patch.


This one survives bootstrap-lto. Thanks.


But Firefox doesn't build:

/home/trippels/gcc_test/usr/local/bin/c++ -fPIC -Wall -Wempty-body 
-Woverloaded-virtual -Wsign-compare -Wwrite-strings -Werror=endif-labels 
-Werror=int-to-pointer-cast -Werror=missing-braces -Werror=pointer-arith 
-Werror=return-type -Werror=sequence-point -Werror=unused-label 
-Werror=trigraphs -Werror=type-limits -Wno-invalid-offsetof -Wcast-align 
-flto=160 --param lto-partitions=160 -mcpu=power8 -ffunction-sections 
-fdata-sections -fno-exceptions -fno-strict-aliasing -frtti -fno-exceptions 
-fno-math-errno -std=gnu++0x -pthread -pipe -UDEBUG -DNDEBUG -O3 
-DU_STATIC_IMPLEMENTATION -fvisibility=hidden -W -Wall -pedantic 
-Wpointer-arith -Wwrite-strings -Wno-long-long -Wno-unused 
-Wno-unused-parameter   -lpthread 
-Wl,--hash-style=gnu,--as-needed,--gc-sections,--icf=all -Wl,-z,noexecstack 
-Wl,-z,text -Wl,--build-id -Wl,--gc-sections  -o ../../bin/makeconv makeconv.o 
ucnvstat.o genmbcs.o gencnvex.o -L../../lib -licutu -L../../lib -licui18n 
-L../../lib -licuuc -L../../stubdata -licudata -

l
pthread -ldl -lm

lto1: internal compiler error: in operator[], at vec.h:736
0x10122377 vecint, va_heap, vl_embed::operator[](unsigned int)
 ../../gcc/gcc/vec.h:736
0x10d1a0f3 vecint, va_heap, vl_embed::operator[](unsigned int)
 ../../gcc/gcc/ipa-icf.c:963
0x10d1a0f3 vecint, va_heap, vl_ptr::operator[](unsigned int)
 ../../gcc/gcc/vec.h:1202
0x10d1a0f3 ipa_icf::sem_function::bb_dict_test(auto_vecint, 0ul, int, int)
 ../../gcc/gcc/ipa-icf.c:970
0x10d1aa73 ipa_icf::sem_function::equals_private(ipa_icf::sem_item*, 
hash_mapsymtab_node*, ipa_icf::sem_item*, default_hashmap_traits)
 ../../gcc/gcc/ipa-icf.c:512
0x10d1afb3 ipa_icf::sem_function::equals(ipa_icf::sem_item*, hash_mapsymtab_node*, 
ipa_icf::sem_item*, default_hashmap_traits)
 ../../gcc/gcc/ipa-icf.c:384
0x10d1dba3 ipa_icf::sem_item_optimizer::subdivide_classes_by_equality(bool)
 ../../gcc/gcc/ipa-icf.c:1833
0x10d2397f ipa_icf::sem_item_optimizer::execute()
 ../../gcc/gcc/ipa-icf.c:1652
0x10d24a93 ipa_icf_driver
 ../../gcc/gcc/ipa-icf.c:2382
0x10d24a93 ipa_icf::pass_ipa_icf::execute(function*)
 ../../gcc/gcc/ipa-icf.c:2430
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.
lto-wrapper: fatal error: /home/trippels/gcc_test/usr/local/bin/c++ returned 1 
exit status
compilation terminated.
/home/trippels/bin/ld: fatal error: lto-wrapper failed
collect2: error: ld returned 1 exit status
make[8]: *** [../../bin/makeconv] Error 1



Hi.

Well, this final version can run profiled-bootstrap and I capable of building 
FF

Re: [PATCH 8/9] Negative numbers added for sreal class.

2014-11-27 Thread Martin Liška

On 11/21/2014 04:21 PM, Martin Liška wrote:

On 11/21/2014 04:02 PM, Richard Biener wrote:

On Fri, Nov 21, 2014 at 3:39 PM, Martin Liška mli...@suse.cz wrote:


Hello.

Ok, this is simplified, one can use sreal a = 12345 and it works ;)


that's a  new API, right?  There is no max () and I think that using
LONG_MIN here is asking for trouble (host dependence).  The
comment in the file says the max should be
sreal (SREAL_MAX_SIG, SREAL_MAX_EXP) and the min
sreal (-SREAL_MAX_SIG, SREAL_MAX_EXP)?



Sure, sreal can store much bigger(smaller) numbers :)


Where do you need sreal::to_double?  The host shouldn't perform
double calculations so it can be only for dumping?  In which case
the user should have used sreal::dump (), maybe with extra
arguments.



That new function was request from Honza, only for debugging purpose.
I agree that dump should this kind of job.

If no other problem, I will run tests once more and commit it.
Thanks,
Martin


-#define SREAL_MAX_EXP (INT_MAX / 4)
+#define SREAL_MAX_EXP (INT_MAX / 8)

this change doesn't look necessary anymore?

Btw, it's also odd that...

  #define SREAL_PART_BITS 32
...
  #define SREAL_MIN_SIG ((uint64_t) 1  (SREAL_PART_BITS - 1))
  #define SREAL_MAX_SIG (((uint64_t) 1  SREAL_PART_BITS) - 1)

thus all m_sig values fit in 32bits but we still use a uint64_t m_sig ...
(the implementation uses 64bit for internal computations, but still
the storage is wasteful?)

Of course the way normalize() works requires that storage to be
64bits to store unnormalized values.

I'd say ok with the SREAL_MAX_EXP change reverted.



Hi.

You are right, this change was done because I used one bit for m_negative 
(bitfield), not needed any more.

Final version attached.

Thank you,
Martin


Thanks,
Richard.





Otherwise looks good to me and sorry for not noticing the above
earlier.

Thanks,
Richard.


Thanks,
Martin



};

extern void debug (sreal ref);
@@ -76,12 +133,12 @@ inline sreal operator+= (sreal a, const sreal
b)

inline sreal operator-= (sreal a, const sreal b)
{
-return a = a - b;
+  return a = a - b;
}

inline sreal operator/= (sreal a, const sreal b)
{
-return a = a / b;
+  return a = a / b;
}

inline sreal operator*= (sreal a, const sreal b)
--
2.1.2










Hello.

After IRC discussions, I decided to give sreal another refactoring where I
use int64_t for m_sig.

This approach looks much easier and straightforward. I would like to
ask folk for comments?

I am able to run profiled bootstrap on x86_64-linux-pc and ppc64-linux-pc
and new regression is introduced.

Thanks,
Martin


From bff0b4b803271788cd90cfd4032ed6d4e6e95707 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Wed, 26 Nov 2014 15:46:42 +0100
Subject: [PATCH] New sreal implementation which uses int64_t as m_sig.

gcc/ChangeLog:

2014-11-27  Martin Liska  mli...@suse.cz

	* sreal.c (sreal::shift_right): New implementation
	for int64_t as m_sig.
	(sreal::normalize): Likewise.
	(sreal::to_int): Likewise.
	(sreal::operator+): Likewise.
	(sreal::operator-): Likewise.
	(sreal::operator*): Likewise.
	(sreal::operator/): Likewise.
	(sreal::signedless_minus): Removed.
	(sreal::signedless_plus): Removed.
	* sreal.h (sreal::operator): New implementation
	for int64_t as m_sig.
---
 gcc/sreal.c | 129 +++-
 gcc/sreal.h |  52 ++--
 2 files changed, 61 insertions(+), 120 deletions(-)

diff --git a/gcc/sreal.c b/gcc/sreal.c
index 2b5e3ae..304feb0 100644
--- a/gcc/sreal.c
+++ b/gcc/sreal.c
@@ -91,7 +91,7 @@ sreal::shift_right (int s)
 
   m_exp += s;
 
-  m_sig += (uint64_t) 1  (s - 1);
+  m_sig += (int64_t) 1  (s - 1);
   m_sig = s;
 }
 
@@ -100,43 +100,46 @@ sreal::shift_right (int s)
 void
 sreal::normalize ()
 {
+  int64_t s = m_sig  0 ? -1 : 1;
+  uint64_t sig = m_sig == LONG_MIN ? LONG_MAX : std::abs (m_sig);
+
   if (m_sig == 0)
 {
-  m_negative = 0;
   m_exp = -SREAL_MAX_EXP;
 }
-  else if (m_sig  SREAL_MIN_SIG)
+  else if (sig  SREAL_MIN_SIG)
 {
   do
 	{
-	  m_sig = 1;
+	  sig = 1;
 	  m_exp--;
+	  gcc_checking_assert (sig);
 	}
-  while (m_sig  SREAL_MIN_SIG);
+  while (sig  SREAL_MIN_SIG);
 
   /* Check underflow.  */
   if (m_exp  -SREAL_MAX_EXP)
 	{
 	  m_exp = -SREAL_MAX_EXP;
-	  m_sig = 0;
+	  sig = 0;
 	}
 }
-  else if (m_sig  SREAL_MAX_SIG)
+  else if (sig  SREAL_MAX_SIG)
 {
   int last_bit;
   do
 	{
-	  last_bit = m_sig  1;
-	  m_sig = 1;
+	  last_bit = sig  1;
+	  sig = 1;
 	  m_exp++;
 	}
-  while (m_sig  SREAL_MAX_SIG);
+  while (sig  SREAL_MAX_SIG);
 
   /* Round the number.  */
-  m_sig += last_bit;
-  if (m_sig  SREAL_MAX_SIG)
+  sig += last_bit;
+  if (sig  SREAL_MAX_SIG)
 	{
-	  m_sig = 1;
+	  sig = 1;
 	  m_exp++;
 	}
 
@@ -144,9 +147,11 @@ sreal::normalize ()
   if (m_exp  SREAL_MAX_EXP)
 	{
 	  m_exp = SREAL_MAX_EXP;
-	  m_sig = SREAL_MAX_SIG;
+	  sig = SREAL_MAX_SIG

Re: [PATCH 2/5] Existing call graph infrastructure enhancement

2014-09-26 Thread Martin Liška

On 09/24/2014 05:01 PM, Jan Hubicka wrote:

Hi.

Following patch enhances API functions to be ready for main patch of this 
patchset.

Ready for thunk?

Thank you,
Martin



gcc/ChangeLog:

2014-09-21  Martin Liška  mli...@suse.cz

* cgraph.c (cgraph_node::release_body): New argument keep_arguments
introduced.
* cgraph.h: Likewise.
* cgraphunit.c (cgraph_node::create_wrapper): Usage of new argument 
introduced.
* ipa-devirt.c (polymorphic_type_binfo_p): Safe check for binfos 
created by Java.
* tree-ssa-alias.c (ao_ref_base_alias_set): Static function transformed 
to global.
* tree-ssa-alias.h: Likewise.



diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 8f04284..d40a2922 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1637,13 +1637,15 @@ release_function_body (tree decl)
 are free'd in final.c via free_after_compilation().  */

  void
-cgraph_node::release_body (void)
+cgraph_node::release_body (bool keep_arguments)
  {
ipa_transforms_to_apply.release ();
if (!used_as_abstract_origin  symtab-state != PARSING)
  {
DECL_RESULT (decl) = NULL;
-  DECL_ARGUMENTS (decl) = NULL;
+
+  if (!keep_arguments)
+   DECL_ARGUMENTS (decl) = NULL;
  }
/* If the node is abstract and needed, then do not clear DECL_INITIAL
   of its associated function function declaration because it's
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index a316e40..19ce3b8 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -915,7 +915,7 @@ public:
   Use this only for functions that are released before being translated to
   target code (i.e. RTL).  Functions that are compiled to RTL and beyond
   are free'd in final.c via free_after_compilation().  */
-  void release_body (void);
+  void release_body (bool keep_arguments = false);


Please add documentation for KEEP_ARGUMENTS explaining that it is useful only 
if you want to
rebuild body as thunk.


/* cgraph_node is no longer nested function; update cgraph accordingly.  */
void unnest (void);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 3e3b8d2..c4597e2 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -2300,7 +2300,7 @@ cgraph_node::create_wrapper (cgraph_node *target)
  tree decl_result = DECL_RESULT (decl);

  /* Remove the function's body.  */

I would say Remove the function's body but keep arguments to be reused for 
thunk.

-release_body ();
+release_body (true);
  reset ();

  DECL_RESULT (decl) = decl_result;
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index af42c6d..f374933 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -225,7 +225,7 @@ static inline bool
  polymorphic_type_binfo_p (tree binfo)
  {
/* See if BINFO's type has an virtual table associtated with it.  */
-  return BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo)));
+  return BINFO_TYPE (binfo)  BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo)));


Aha, this change was for Java, right? Please add comment that Java produces
BINFOs without BINFO_TYPE set.

  }

  /* Return TRUE if all derived types of T are known and thus
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 442112a..1bf88e2 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -559,7 +559,7 @@ ao_ref_base (ao_ref *ref)

  /* Returns the base object alias set of the memory reference *REF.  */

-static alias_set_type
+alias_set_type
  ao_ref_base_alias_set (ao_ref *ref)
  {
tree base_ref;
diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 436381a..0d35283 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -98,6 +98,7 @@ extern void ao_ref_init (ao_ref *, tree);
  extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
  extern tree ao_ref_base (ao_ref *);
  extern alias_set_type ao_ref_alias_set (ao_ref *);
+extern alias_set_type ao_ref_base_alias_set (ao_ref *);


I can not approve this change, but I suppose it is what Richard suggested?



There's updated version of the patch that deals with Honza's notes.
Yes, I explicitly asked Richard if we can mark the function as global.

I will commit the patch soon.

Thank you,
Martin


Patch is OK except for the tree-ssa-alias bits.
Honza

  extern bool ptr_deref_may_alias_global_p (tree);
  extern bool ptr_derefs_may_alias_p (tree, tree);
  extern bool ref_may_alias_global_p (tree);




diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 1cfc783..fdcaf79 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1625,16 +1625,19 @@ release_function_body (tree decl)
 /* Release memory used to represent body of function.
Use this only for functions that are released before being translated to
target code (i.e. RTL).  Functions that are compiled to RTL and beyond
-   are free'd in final.c via free_after_compilation().  */
+   are free'd in final.c via free_after_compilation().
+   KEEP_ARGUMENTS are useful only if you want to rebuild body as thunk.  */
 
 void
-cgraph_node::release_body (void

Re: [PATCH 3/5] IPA ICF pass

2014-09-26 Thread Martin Liška

On 07/17/2014 05:05 PM, Martin Liška wrote:


On 07/06/2014 12:53 AM, Jan Hubicka wrote:

On Fri, 20 Jun 2014, Trevor Saunders wrote:

+@item -fipa-icf
+@opindex fipa-icf
+Perform Identical Code Folding for functions and read-only variables.

I would perhaps explicitly say that the optimizations reduce code size
and may disturb unwind stacks by replacing a function by equivalent
one with different name.

+Behavior is similar to Gold Linker ICF optimization. Symbols proved

Perhaps tell a bit more here. The optimization works more effectively with link
time optimization enabled and that the Gold and GCC ICF works on different
levels and thus are not equivalent optimizations - there are equivallences that
are found only by GCC and equivalences found only by Gold.


+as semantically equivalent are redirected to corresponding symbol. The pass

+sensitively decides for usage of alias, thunk or local redirection.
+This flag is enabled by default at @option{-O2}.

Probably at -Os too.

I found this a bit hard to read/understand.

Perhaps first describe what it does and then, before This flag is
enabled... note that This is similar to the ICF optimization performed
by the Gold linker.
Symbols proved (plural) vs to corresponding symbol seems to miss
an an a as in a corresponding symbol.  Alas, how is that one
determined?  Is this more ...are merged into one, from the user's
perspective?

What does it mean to sensitively decide for usage of alias, thunk,
or local redirection?

I think this is just a technical detail of the implementation.  I would not put 
that
into user manual.  It means that for some functions you can make alias, for 
others
you need thunk (so addresses stay different)

Gerald


Hello,
there's updated version of patch that newly uses devirtualization machinery 
to identify polymorphic types that can potentially break ICF (There are such 
examples in Firefox).

Apart from that, I did many small updates, incorporated Trevor's comments and I 
tried to improve documentation entry for the pass.
Patch has been tested for Firefox and Inkscape with LTO.

Thanks,
Martin


Hello.

After couple of weeks I spent with fixing new issues connected to the pass:
1) Inliner failed in case I created a thunk and release body of a function. In 
such situation we need to preserve DECL_ARGUMENTS. I added new argument for: 
cgraph_node::release_body.
2) Awkward error was hidden in libstdc++ test for trees, there were two 
functions having one argument that differs in one sub-template. Thank to 
Richard who helped me to fix alias set accuracy.
3) There was missing comparison for FIELD_DECLS (DECL_FIELD_BIT_OFFSET) which 
caused me miscompilation.
4) After discussion with Honza, we introduced new cgraph_node flag called 
icf_merged. The flag helps to fix verifier in cgraph_node::verify.

Current version of the patch can bootstrap on x86_64-linux. With following 
patch applied, there's not testcase regression.
I tried to build Firefox, Inkscape, GIMP and Chromium with LTO and patch 
applied and no regression has been observed.

Moreover, I discussed with Richard and the pass is capable of playing role in 
tree-ssa-tail-merge (according to first experiments). It can replace current 
usage of value numbering.

I hope we can apply the patch to the mainline in a short-term time window?

Thank you,
Martin

From 53d20d0b0c209b50d385ee8d85d5a7ed4594d477 Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 26 Sep 2014 13:51:47 +0200
Subject: [PATCH 1/3] IPA ICF: patch1

---
 gcc/Makefile.in  |2 +
 gcc/cgraph.c |   20 +-
 gcc/cgraph.h |2 +
 gcc/cgraphunit.c |2 +-
 gcc/common.opt   |   12 +
 gcc/doc/invoke.texi  |   16 +-
 gcc/ipa-icf-gimple.c |  384 +++
 gcc/ipa-icf.c| 2841 ++
 gcc/ipa-icf.h|  803 ++
 gcc/lto-cgraph.c |2 +
 gcc/lto-section-in.c |3 +-
 gcc/lto-streamer.h   |1 +
 gcc/opts.c   |6 +
 gcc/passes.def   |1 +
 gcc/timevar.def  |1 +
 gcc/tree-pass.h  |1 +
 16 files changed, 4089 insertions(+), 8 deletions(-)
 create mode 100644 gcc/ipa-icf-gimple.c
 create mode 100644 gcc/ipa-icf.c
 create mode 100644 gcc/ipa-icf.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 3dd9d8f..8d02425 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1265,6 +1265,8 @@ OBJS = \
 	ipa-profile.o \
 	ipa-prop.o \
 	ipa-pure-const.o \
+	ipa-icf.o \
+	ipa-icf-gimple.o \
 	ipa-reference.o \
 	ipa-ref.o \
 	ipa-utils.o \
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index fdcaf79..439db49 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1913,6 +1913,8 @@ cgraph_node::dump (FILE *f)
 fprintf (f,  only_called_at_exit);
   if (tm_clone)
 fprintf (f,  tm_clone);
+  if (icf_merged)
+fprintf (f,  icf_merged);
   if (DECL_STATIC_CONSTRUCTOR (decl))
 fprintf (f, static_constructor (priority:%i), get_init_priority ());
   if (DECL_STATIC_DESTRUCTOR (decl

Re: [PATCH 4/5] Existing tests fix

2014-09-26 Thread Martin Liška

On 06/30/2014 02:11 PM, Martin Liška wrote:


On 06/17/2014 09:52 PM, Jeff Law wrote:

On 06/13/14 04:48, mliska wrote:

Hi,
   many tests rely on a precise number of scanned functions in a dump file. If 
IPA ICF decides to merge some function and(or) read-only variables, counts do 
not match.

Martin

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* c-c++-common/rotate-1.c: Text
* c-c++-common/rotate-2.c: New test.
* c-c++-common/rotate-3.c: Likewise.
* c-c++-common/rotate-4.c: Likewise.
* g++.dg/cpp0x/rv-return.C: Likewise.
* g++.dg/cpp0x/rv1n.C: Likewise.
* g++.dg/cpp0x/rv1p.C: Likewise.
* g++.dg/cpp0x/rv2n.C: Likewise.
* g++.dg/cpp0x/rv3n.C: Likewise.
* g++.dg/cpp0x/rv4n.C: Likewise.
* g++.dg/cpp0x/rv5n.C: Likewise.
* g++.dg/cpp0x/rv6n.C: Likewise.
* g++.dg/cpp0x/rv7n.C: Likewise.
* gcc.dg/ipa/ipacost-1.c: Likewise.
* gcc.dg/ipa/ipacost-2.c: Likewise.
* gcc.dg/ipa/ipcp-agg-6.c: Likewise.
* gcc.dg/ipa/remref-2a.c: Likewise.
* gcc.dg/ipa/remref-2b.c: Likewise.
* gcc.dg/pr46309-2.c: Likewise.
* gcc.dg/torture/ipa-pta-1.c: Likewise.
* gcc.dg/tree-ssa/andor-3.c: Likewise.
* gcc.dg/tree-ssa/andor-4.c: Likewise.
* gcc.dg/tree-ssa/andor-5.c: Likewise.
* gcc.dg/vect/no-vfa-pr29145.c: Likewise.
* gcc.dg/vect/vect-cond-10.c: Likewise.
* gcc.dg/vect/vect-cond-9.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise.
* gcc.target/i386/bmi-1.c: Likewise.
* gcc.target/i386/bmi-2.c: Likewise.
* gcc.target/i386/pr56564-2.c: Likewise.
* g++.dg/opt/pr30965.C: Likewise.
* g++.dg/tree-ssa/pr19637.C: Likewise.
* gcc.dg/guality/csttest.c: Likewise.
* gcc.dg/ipa/iinline-4.c: Likewise.
* gcc.dg/ipa/iinline-7.c: Likewise.
* gcc.dg/ipa/ipa-pta-13.c: Likewise.

I know this is the least interesting part of your changes, but it's also simple 
and mechanical and thus trivial to review. Approved, but obviously don't 
install until the rest of your patch has been approved.

Similar changes for recently added tests or cases where you might improve ICF 
requiring similar tweaks to existing tests are pre-approved as well.

jeff


Hello,
I fixed few more tests and added correct ChangeLog message.

gcc/testsuite/ChangeLog

2014-06-30  Martin Liska  mli...@suse.cz
 Honza Hubicka  hubi...@ucw.cz

 * c-c++-common/rotate-1.c: Test fixed.
 * c-c++-common/rotate-2.c: Likewise.
 * c-c++-common/rotate-3.c: Likewise.
 * c-c++-common/rotate-4.c: Likewise.
 * g++.dg/cpp0x/rv-return.C: Likewise.
 * g++.dg/cpp0x/rv1n.C: Likewise.
 * g++.dg/cpp0x/rv1p.C: Likewise.
 * g++.dg/cpp0x/rv2n.C: Likewise.
 * g++.dg/cpp0x/rv3n.C: Likewise.
 * g++.dg/cpp0x/rv4n.C: Likewise.
 * g++.dg/cpp0x/rv5n.C: Likewise.
 * g++.dg/cpp0x/rv6n.C: Likewise.
 * g++.dg/cpp0x/rv7n.C: Likewise.
 * g++.dg/ipa/devirt-g-1.C: Likewise.
 * g++.dg/ipa/inline-1.C: Likewise.
 * g++.dg/ipa/inline-2.C: Likewise.
 * g++.dg/ipa/inline-3.C: Likewise.
 * g++.dg/opt/pr30965.C: Likewise.
 * g++.dg/tree-ssa/pr19637.C: Likewise.
 * gcc.dg/guality/csttest.c: Likewise.
 * gcc.dg/ipa/iinline-4.c: Likewise.
 * gcc.dg/ipa/iinline-7.c: Likewise.
 * gcc.dg/ipa/ipa-pta-13.c: Likewise.
 * gcc.dg/ipa/ipacost-1.c: Likewise.
 * gcc.dg/ipa/ipacost-2.c: Likewise.
 * gcc.dg/ipa/ipcp-agg-6.c: Likewise.
 * gcc.dg/ipa/remref-2a.c: Likewise.
 * gcc.dg/ipa/remref-2b.c: Likewise.
 * gcc.dg/pr46309-2.c: Likewise.
 * gcc.dg/torture/ipa-pta-1.c: Likewise.
 * gcc.dg/tree-ssa/andor-3.c: Likewise.
 * gcc.dg/tree-ssa/andor-4.c: Likewise.
 * gcc.dg/tree-ssa/andor-5.c: Likewise.
 * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
 * gcc.dg/vect/vect-cond-10.c: Likewise.
 * gcc.dg/vect/vect-cond-9.c: Likewise.
 * gcc.dg/vect/vect-widen-mult-const-s16.c: Likewise.
 * gcc.dg/vect/vect-widen-mult-const-u16.c: Likewise.
 * gcc.dg/vect/vect-widen-mult-half-u8.c: Likewise.
 * gcc.target/i386/bmi-1.c: Likewise.
 * gcc.target/i386/bmi-2.c: Likewise.
 * gcc.target/i386/pr56564-2.c: Likewise.

Thank you,
Martin



Hello.

There's updated version of the patch that fixes another issued connected to 
test suite.

Thanks,
Martin
From e7818e646687c05e13a68828ef70fb41716a267c Mon Sep 17 00:00:00 2001
From: mliska mli...@suse.cz
Date: Fri, 26 Sep 2014 13:52:29 +0200
Subject: [PATCH 2/3] IPA ICF: patch2.

---
 gcc/testsuite/c-c++-common/rotate-1.c | 2 +-
 gcc/testsuite/c-c++-common/rotate-2.c | 2 +-
 gcc/testsuite/c-c++-common/rotate-3.c | 2 +-
 gcc/testsuite/c-c++-common/rotate-4.c | 2 +-
 gcc/testsuite/g++.dg/cpp0x/rv-return.C| 1 +
 gcc/testsuite/g++.dg/cpp0x/rv1n.C | 2

Re: [PATCH 3/5] IPA ICF pass

2014-09-27 Thread Martin Liška
On 09/27/2014 01:27 AM, Jan Hubicka wrote:
 While a plain Firefox -flto build works fine. LTO/PGO build fails with:

 lto1: internal compiler error: in ipa_merge_profiles, at ipa-utils.c:540
 0x7d6165 ipa_merge_profiles(cgraph_node*, cgraph_node*)
 ../../gcc/gcc/ipa-utils.c:540
 0xf10c41 ipa_icf::sem_function::merge(ipa_icf::sem_item*)
 ../../gcc/gcc/ipa-icf.c:753
 0xf15206 ipa_icf::sem_item_optimizer::merge_classes(unsigned int)
 ../../gcc/gcc/ipa-icf.c:2706
 0xf1c1f4 ipa_icf::sem_item_optimizer::execute()
 ../../gcc/gcc/ipa-icf.c:2098
 0xf1d3f1 ipa_icf_driver
 ../../gcc/gcc/ipa-icf.c:2784
 0xf1d3f1 ipa_icf::pass_ipa_icf::execute(function*)
 ../../gcc/gcc/ipa-icf.c:2831


 The pass is also very memory hungry (from 3GB without ICF to 4GB during
 libxul link), while the code size savings are in the 1% range.


The majority of the problem are groups of candidates that are built according 
to hash.
The hash value is based on a number of arguments, number of BB, number of 
gimple statements and types of these statements.
It groups function into classes. In WPA (before a body of any function is 
loaded) I get following histogram:

Dump after WPA based types groups
Congruence classes: 97204 (unique hash values: 88725), with total: 191457 items
Class size histogram [num of members]: number of classe number of classess
[1]: 86453 classes
[2]: 5680 classes
[3]: 1541 classes
[4]: 915 classes
[5]: 446 classes
[6]: 346 classes
[7]: 200 classes
[8]: 181 classes
[9]: 154 classes
[10]: 109 classes
[11]: 87 classes
[12]: 87 classes
[13]: 68 classes
[14]: 58 classes
[15]: 58 classes
[16]: 41 classes
[17]: 25 classes
[18]: 33 classes
[19]: 28 classes
[20]: 25 classes
[21]: 19 classes
[22]: 30 classes
[23]: 24 classes
[24]: 33 classes
[25]: 17 classes
[26]: 15 classes
[27]: 10 classes
[28]: 13 classes
[29]: 18 classes
[30]: 10 classes

It means that each class with more than one member needs to be iterated and 
these functions are compared. And yes, there's the root of the problem.
I have to load function body to process deep function comparison. As you can 
see, we have almost 200k function, where more than half each situated
in a group with more that one member. So that 1GB extra memory usage is caused 
by these bodies:

Init called for 105004 items (54.84%).

Memory footprint can be significantly reduced if one can load the body and 
release it and the memory is freed. I asked Honza about it, but it looks
GGC mechanism cannot be easily forced to release it.

 
 Thnks for checking. I was just thinking about doing that myself.  Would
 you mind posting -ftime-report of firefox WPA stage?
 
 It seems that in this case we reject too many of equality candidates?
 It think the original numbers was about 4-5% but later some equivalences was
 disabled because of devirt/aliasing issues. Do you compare it with gold ICF
 enabled? There are quite few obvious improvements to the analysis that can
 be done, but I guess we need to analyze the interesting cases one by one.

You are right, the number were quite promising, but during the time, I had to
reduce the aggressivity of the pass. As Honza said, it can be improved 
step-by-step.

 
 One thing that Martin can try is to hook into lto-symtab and try to check
 that the COMDAT functions that are known to be same pass the equality check.
 I suppose we will learn interesting things this way.
 
Good point, I will try it.

Martin


 I think the patch adds quite important infrastructure for gimple semantic
 equality checking and function merging. I went through the majority of code 
 and
 I think it is mostly ready to mainline (i.e. cleaner than what we have in
 tree-ssa-tailmerge) so hope we can finish the review process next week.
 We will need to get better cost/benefits ratio to enable it for -O2 that is
 someting I would really like to see for 5.0, but it seems to be easier to
 handle this incrementally

Thank you for the review,
Martin

 
 Honza
 



Re: [PATCH 3/5] IPA ICF pass

2014-09-27 Thread Martin Liška
On 09/27/2014 07:59 AM, Markus Trippelsdorf wrote:
 On 2014.09.27 at 01:27 +0200, Jan Hubicka wrote:
 While a plain Firefox -flto build works fine. LTO/PGO build fails with:

 lto1: internal compiler error: in ipa_merge_profiles, at ipa-utils.c:540
 0x7d6165 ipa_merge_profiles(cgraph_node*, cgraph_node*)
 ../../gcc/gcc/ipa-utils.c:540
 0xf10c41 ipa_icf::sem_function::merge(ipa_icf::sem_item*)
 ../../gcc/gcc/ipa-icf.c:753
 0xf15206 ipa_icf::sem_item_optimizer::merge_classes(unsigned int)
 ../../gcc/gcc/ipa-icf.c:2706
 0xf1c1f4 ipa_icf::sem_item_optimizer::execute()
 ../../gcc/gcc/ipa-icf.c:2098
 0xf1d3f1 ipa_icf_driver
 ../../gcc/gcc/ipa-icf.c:2784
 0xf1d3f1 ipa_icf::pass_ipa_icf::execute(function*)
 ../../gcc/gcc/ipa-icf.c:2831


 The pass is also very memory hungry (from 3GB without ICF to 4GB during
 libxul link), while the code size savings are in the 1% range.

 Thnks for checking. I was just thinking about doing that myself.  Would
 you mind posting -ftime-report of firefox WPA stage?
 
 (without ICF)
 Execution times (seconds)
  phase setup :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) 
 wall1412 kB ( 0%) ggc
  phase opt and generate  :  58.38 (63%) usr   2.00 (47%) sys  60.37 (40%) 
 wall  403069 kB (12%) ggc
  phase stream in :  30.24 (33%) usr   0.97 (23%) sys  33.90 (22%) 
 wall 2944210 kB (88%) ggc
  phase stream out:   4.29 ( 5%) usr   1.32 (31%) sys  57.32 (38%) 
 wall   0 kB ( 0%) ggc
  phase finalize  :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.13 ( 0%) 
 wall   0 kB ( 0%) ggc
  garbage collection  :   3.68 ( 4%) usr   0.00 ( 0%) sys   3.68 ( 2%) 
 wall   0 kB ( 0%) ggc
  callgraph optimization  :   0.50 ( 1%) usr   0.00 ( 0%) sys   0.50 ( 0%) 
 wall 166 kB ( 0%) ggc
  ipa dead code removal   :   6.91 ( 7%) usr   0.08 ( 2%) sys   7.25 ( 5%) 
 wall   0 kB ( 0%) ggc
  ipa virtual call target :   7.08 ( 8%) usr   0.04 ( 1%) sys   6.93 ( 5%) 
 wall   0 kB ( 0%) ggc
  ipa devirtualization:   0.27 ( 0%) usr   0.00 ( 0%) sys   0.27 ( 0%) 
 wall   10365 kB ( 0%) ggc
  ipa cp  :   1.81 ( 2%) usr   0.06 ( 1%) sys   3.40 ( 2%) 
 wall  173701 kB ( 5%) ggc
  ipa inlining heuristics :  16.60 (18%) usr   0.27 ( 6%) sys  17.48 (12%) 
 wall  532704 kB (16%) ggc
  ipa comdats :   0.19 ( 0%) usr   0.00 ( 0%) sys   0.19 ( 0%) 
 wall   0 kB ( 0%) ggc
  ipa lto gimple out  :   0.21 ( 0%) usr   0.04 ( 1%) sys   0.97 ( 1%) 
 wall   0 kB ( 0%) ggc
  ipa lto decl in :  18.29 (20%) usr   0.54 (13%) sys  18.96 (12%) 
 wall 2226088 kB (66%) ggc
  ipa lto decl out:   3.93 ( 4%) usr   0.13 ( 3%) sys   4.06 ( 3%) 
 wall   0 kB ( 0%) ggc
  ipa lto constructors in :   0.24 ( 0%) usr   0.03 ( 1%) sys   0.59 ( 0%) 
 wall   14226 kB ( 0%) ggc
  ipa lto constructors out:   0.08 ( 0%) usr   0.04 ( 1%) sys   0.15 ( 0%) 
 wall   0 kB ( 0%) ggc
  ipa lto cgraph I/O  :   0.89 ( 1%) usr   0.12 ( 3%) sys   1.02 ( 1%) 
 wall  364151 kB (11%) ggc
  ipa lto decl merge  :   2.14 ( 2%) usr   0.01 ( 0%) sys   2.14 ( 1%) 
 wall8196 kB ( 0%) ggc
  ipa lto cgraph merge:   1.59 ( 2%) usr   0.00 ( 0%) sys   1.60 ( 1%) 
 wall   12716 kB ( 0%) ggc
  whopr wpa   :   1.54 ( 2%) usr   0.03 ( 1%) sys   1.55 ( 1%) 
 wall   1 kB ( 0%) ggc
  whopr wpa I/O   :   0.04 ( 0%) usr   1.11 (26%) sys  52.10 (34%) 
 wall   0 kB ( 0%) ggc
  whopr partitioning  :   5.02 ( 5%) usr   0.01 ( 0%) sys   5.03 ( 3%) 
 wall4938 kB ( 0%) ggc
  ipa reference   :   2.04 ( 2%) usr   0.02 ( 0%) sys   2.08 ( 1%) 
 wall   0 kB ( 0%) ggc
  ipa profile :   0.32 ( 0%) usr   0.00 ( 0%) sys   0.33 ( 0%) 
 wall   0 kB ( 0%) ggc
  ipa pure const  :   2.43 ( 3%) usr   0.02 ( 0%) sys   2.49 ( 2%) 
 wall   0 kB ( 0%) ggc
  tree STMT verifier  :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) 
 wall   0 kB ( 0%) ggc
  callgraph verifier  :  16.31 (18%) usr   1.69 (39%) sys  17.96 (12%) 
 wall   0 kB ( 0%) ggc
  dominance computation   :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.02 ( 0%) 
 wall   0 kB ( 0%) ggc
  varconst:   0.01 ( 0%) usr   0.03 ( 1%) sys   0.05 ( 0%) 
 wall   0 kB ( 0%) ggc
  unaccounted todo:   0.69 ( 1%) usr   0.00 ( 0%) sys   0.69 ( 0%) 
 wall   0 kB ( 0%) ggc
  TOTAL :  92.91 4.29   151.73
 3348693 kB
 Extra diagnostic checks enabled; compiler may run slowly.
 Configure with --enable-checking=release to disable checks.
 
 (with ICF)
 Execution times (seconds)
  phase setup :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) 
 wall1412 kB ( 0%) ggc
  phase opt and generate  :  82.70 (70%) usr   3.31 (53%) sys  86.17 (45%) 
 wall 1468975 kB (33%) ggc
  phase stream in :  30.46 (26%) usr   1.02 (16%) sys  31.48 (16%) 
 wall 2944210 kB (67%) ggc
  phase stream out:   4.52 ( 4%) usr 

Re: [PATCH 3/5] IPA ICF pass

2014-09-27 Thread Martin Liška
On 09/27/2014 09:47 AM, Markus Trippelsdorf wrote:
 On 2014.09.27 at 07:59 +0200, Markus Trippelsdorf wrote:

 It seems that in this case we reject too many of equality candidates?
 It think the original numbers was about 4-5% but later some equivalences was
 disabled because of devirt/aliasing issues. Do you compare it with gold ICF
 enabled? There are quite few obvious improvements to the analysis that can
 be done, but I guess we need to analyze the interesting cases one by one.
 
 Forgot to post the binary size numbers (in bytes):
 
   | gold's icf off | gold's icf on  |
 --+++
 gcc's icf off |79793880|74881040|
 --+-+
 gcc's icf on  |78043608|73612800|
 --+++
 

Thanks once more!

Gold ICF is quite strong, I will verify what functions are not caught by IPA 
ICF.
These data present that IPA ICF can reduce the binary by 2.19%. I know that 
it's quite a small improvement,
but if you realize that the pass can reduce just the size of .text (and 
slightly related sections). There are
stats about libxul.so (please ignore last 3 columns):

Section name   Start   Size in BSizePortion Disk 
read in B   Disk read   Sec. portion
   0   0  0.00 B  0.00% 
 0  0.00 B  0.00%
.note.gnu.build-i512  36 36.00 B  0.00% 
 0  0.00 B  0.00%
.dynsym  552   8119279.29 KB  0.08% 
 0  0.00 B  0.00%
.dynstr81744   9085988.73 KB  0.09% 
 0  0.00 B  0.00%
.hash 172608   2175221.24 KB  0.02% 
 0  0.00 B  0.00%
.gnu.version  1943606766 6.61 KB  0.01% 
 0  0.00 B  0.00%
.gnu.version_d201128  56 56.00 B  0.00% 
 0  0.00 B  0.00%
.gnu.version_r2011841216 1.19 KB  0.00% 
 0  0.00 B  0.00%
.rela.dyn 202400 8198208 7.82 MB  8.56% 
 0  0.00 B  0.00%
.rela.plt8400608   7027268.62 KB  0.07% 
 0  0.00 B  0.00%
.init8470880  26 26.00 B  0.00% 
 0  0.00 B  0.00%
.plt 8470912   4686445.77 KB  0.05% 
 0  0.00 B  0.00%
.text85177763901433337.21 MB 40.72% 
 0  0.00 B  0.00%
.fini   47532112   9  9.00 B  0.00% 
 0  0.00 B  0.00%
.rodata 475322881525856014.55 MB 15.93% 
 0  0.00 B  0.00%
.eh_frame   62790848 6203564 5.92 MB  6.47% 
 0  0.00 B  0.00%
.eh_frame_hdr   68994412 1088012 1.04 MB  1.14% 
 0  0.00 B  0.00%
.tbss   70082560   4  4.00 B  0.00% 
 0  0.00 B  0.00%
.dynamic700825601104 1.08 KB  0.00% 
 0  0.00 B  0.00%
.got700836641384 1.35 KB  0.00% 
 0  0.00 B  0.00%
.got.plt70085048   2344822.90 KB  0.02% 
 0  0.00 B  0.00%
.data   70108544  811616   792.59 KB  0.85% 
 0  0.00 B  0.00%
.jcr70920160   8  8.00 B  0.00% 
 0  0.00 B  0.00%
.tm_clone_table 70920168   0  0.00 B  0.00% 
 0  0.00 B  0.00%
.fini_array 70920168   8  8.00 B  0.00% 
 0  0.00 B  0.00%
.init_array 70920176  16 16.00 B  0.00% 
 0  0.00 B  0.00%
.data.rel.ro.loca   70920192 3938880 3.76 MB  4.11% 
 0  0.00 B  0.00%
.data.rel.ro74859072  269216   262.91 KB  0.28% 
 0  0.00 B  0.00%
.bss75128320 1844246 1.76 MB  1.92% 
 0  0.00 B  0.00%
.debug_line 75128288 517517.00 B  0.00% 
 0  0.00 B  0.00%
.debug_info 75128805 817817.00 B  0.00% 
 0  0.00 B  0.00%
.debug_abbrev   75129622 438438.00 B  0.00% 
 0   

Re: [PATCH, DOC]: New value 'default' for --enable-languages

2014-10-03 Thread Martin Liška

On 08/21/2014 01:57 PM, Martin Liška wrote:

Ping.

There was no explicit agreement that I can commit the change to trunk?

Thanks,
Martin

On 07/30/2014 08:19 PM, Martin Liška wrote:

On 07/30/2014 06:38 PM, Mike Stump wrote:

On Jul 30, 2014, at 6:20 AM, Richard Biener richard.guent...@gmail.com wrote:

On Wed, Jul 30, 2014 at 3:19 PM, Richard Biener
richard.guent...@gmail.com wrote:

On Wed, Jul 30, 2014 at 1:37 PM, Martin Liška mli...@suse.cz wrote:

Hello,
   as discussed in previous thread
(https://gcc.gnu.org/ml/gcc-patches/2014-07/msg02010.html), I would like to
add more intuitive behavior for --enable-languages configure option.

It works for me, but as I'm currently always testing all,ada,obj-c++ how
can I easily continue to do that - that is, not test go?  ;)

Of course with default,ada,obj-c++ ... stupid me.

In time, we’ll have a all,!go….  :-)

Does 'go' mean that the patch is ready for trunk :D ?

Martin





PING^2.

Thank you,
Martin


[PATCH] lto.c: add ARG_UNUSED for an argument

2014-10-06 Thread Martin Liška

Hello.

In lto.c, if #ifdef HAVE_WORKING_FORK isn't defined, 'last' argument is unused 
and in that case GCC cannot bootstrap due to a warning.

Ready for trunk?
Built on x86_64-linux-gnu.

Thanks,
Martin
gcc/lto/ChangeLog:

2014-10-06  Martin Liska  mli...@suse.cz

* lto.c (stream_out): ARG_UNUSED added for last argument.
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 6cbb178..bc53632 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -2495,7 +2495,8 @@ wait_for_child ()
Fork if that seems to help.  */
 
 static void
-stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
+stream_out (char *temp_filename, lto_symtab_encoder_t encoder,
+	bool ARG_UNUSED (last))
 {
 #ifdef HAVE_WORKING_FORK
   static int nruns;


[PATCH] PR/63376

2014-10-10 Thread Martin Liška
Hello.

This is a oneline patch that fixed the issue in PR63376. This was a mechanical 
error and I will commit it as obivous.

Thank you,
Martin
gcc/ChangeLog:

2014-10-11  Martin Liska  mli...@suse.cz

PR/63376
* cgraphunit.c (symbol_table::process_new_functions): Missing call
for call_cgraph_insertion_hooks added.
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index d463505..5985e3d 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -329,6 +329,7 @@ symbol_table::process_new_functions (void)
 	  free_dominance_info (CDI_POST_DOMINATORS);
 	  free_dominance_info (CDI_DOMINATORS);
 	  pop_cfun ();
+	  call_cgraph_insertion_hooks (node);
 	  break;
 
 	case EXPANSION:


Re: [PATCH 3/5] IPA ICF pass

2014-10-10 Thread Martin Liška
On 09/28/2014 03:20 AM, Jan Hubicka wrote:

 Hi.

 Thank you Markus for presenting numbers, it corresponds with I measured. If 
 I see correctly, IPA ICF pass takes about 7 seconds,
 the rest is distributed in verifier (not interesting for release version of 
 the compiler) and 'phase opt and generate'. No idea
 what can make the difference?
 
 phase opt and generate just combine all the optimization times together, so it
 is same 7 seconds as in the ICF pass :)
 1GB of function bodies just to elimnate 2-3% of code seems quite alot. Do you
 have any idea how many of those turns out to be different?
 It would be nice to be able to release the duplicate bodies from memory after
 the equivalency was stablished
 
 Honza
 

 Martin

(I resend the message, my mail client was a bit confused, please do _not_ reply 
to faktur...@foxlink.cz)

Hello.

After few days of measurement and tuning, I was able to get numbers to the 
following shape:
Execution times (seconds)
 phase setup :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall  
  1412 kB ( 0%) ggc
 phase opt and generate  :  27.83 (59%) usr   0.66 (19%) sys  28.52 (37%) wall 
1028813 kB (24%) ggc
 phase stream in :  16.90 (36%) usr   0.63 (18%) sys  17.60 (23%) wall 
3246453 kB (76%) ggc
 phase stream out:   2.76 ( 6%) usr   2.19 (63%) sys  31.34 (40%) wall  
 2 kB ( 0%) ggc
 callgraph optimization  :   0.36 ( 1%) usr   0.00 ( 0%) sys   0.35 ( 0%) wall  
40 kB ( 0%) ggc
 ipa dead code removal   :   3.31 ( 7%) usr   0.01 ( 0%) sys   3.25 ( 4%) wall  
 0 kB ( 0%) ggc
 ipa virtual call target :   3.69 ( 8%) usr   0.03 ( 1%) sys   3.80 ( 5%) wall  
21 kB ( 0%) ggc
 ipa devirtualization:   0.12 ( 0%) usr   0.00 ( 0%) sys   0.15 ( 0%) wall  
 13704 kB ( 0%) ggc
 ipa cp  :   1.11 ( 2%) usr   0.07 ( 2%) sys   1.17 ( 2%) wall  
188558 kB ( 4%) ggc
 ipa inlining heuristics :   8.17 (17%) usr   0.14 ( 4%) sys   8.27 (11%) wall  
494738 kB (12%) ggc
 ipa comdats :   0.12 ( 0%) usr   0.00 ( 0%) sys   0.12 ( 0%) wall  
 0 kB ( 0%) ggc
 ipa lto gimple in   :   1.86 ( 4%) usr   0.40 (11%) sys   2.20 ( 3%) wall  
537970 kB (13%) ggc
 ipa lto gimple out  :   0.19 ( 0%) usr   0.08 ( 2%) sys   0.27 ( 0%) wall  
 2 kB ( 0%) ggc
 ipa lto decl in :  12.20 (26%) usr   0.37 (11%) sys  12.64 (16%) wall 
2441687 kB (57%) ggc
 ipa lto decl out:   2.51 ( 5%) usr   0.21 ( 6%) sys   2.71 ( 3%) wall  
 0 kB ( 0%) ggc
 ipa lto constructors in :   0.13 ( 0%) usr   0.02 ( 1%) sys   0.17 ( 0%) wall  
 15692 kB ( 0%) ggc
 ipa lto constructors out:   0.03 ( 0%) usr   0.00 ( 0%) sys   0.03 ( 0%) wall  
 0 kB ( 0%) ggc
 ipa lto cgraph I/O  :   0.54 ( 1%) usr   0.09 ( 3%) sys   0.63 ( 1%) wall  
407182 kB (10%) ggc
 ipa lto decl merge  :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.34 ( 2%) wall  
  8220 kB ( 0%) ggc
 ipa lto cgraph merge:   1.00 ( 2%) usr   0.00 ( 0%) sys   1.00 ( 1%) wall  
 14605 kB ( 0%) ggc
 whopr wpa   :   0.92 ( 2%) usr   0.00 ( 0%) sys   0.89 ( 1%) wall  
 1 kB ( 0%) ggc
 whopr wpa I/O   :   0.01 ( 0%) usr   1.90 (55%) sys  28.31 (37%) wall  
 0 kB ( 0%) ggc
 whopr partitioning  :   2.81 ( 6%) usr   0.01 ( 0%) sys   2.83 ( 4%) wall  
  4943 kB ( 0%) ggc
 ipa reference   :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.35 ( 2%) wall  
 0 kB ( 0%) ggc
 ipa profile :   0.20 ( 0%) usr   0.01 ( 0%) sys   0.21 ( 0%) wall  
 0 kB ( 0%) ggc
 ipa pure const  :   1.62 ( 3%) usr   0.00 ( 0%) sys   1.63 ( 2%) wall  
 0 kB ( 0%) ggc
 ipa icf :   2.65 ( 6%) usr   0.02 ( 1%) sys   2.68 ( 3%) wall  
  1352 kB ( 0%) ggc
 inline parameters   :   0.00 ( 0%) usr   0.01 ( 0%) sys   0.00 ( 0%) wall  
 0 kB ( 0%) ggc
 tree SSA rewrite:   0.11 ( 0%) usr   0.01 ( 0%) sys   0.08 ( 0%) wall  
 18919 kB ( 0%) ggc
 tree SSA other  :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall  
 0 kB ( 0%) ggc
 tree SSA incremental:   0.24 ( 1%) usr   0.01 ( 0%) sys   0.32 ( 0%) wall  
 11325 kB ( 0%) ggc
 tree operand scan   :   0.15 ( 0%) usr   0.02 ( 1%) sys   0.18 ( 0%) wall  
116283 kB ( 3%) ggc
 dominance frontiers :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.02 ( 0%) wall  
 0 kB ( 0%) ggc
 dominance computation   :   0.13 ( 0%) usr   0.01 ( 0%) sys   0.16 ( 0%) wall  
 0 kB ( 0%) ggc
 varconst:   0.01 ( 0%) usr   0.02 ( 1%) sys   0.01 ( 0%) wall  
 0 kB ( 0%) ggc
 loop fini   :   0.02 ( 0%) usr   0.00 ( 0%) sys   0.04 ( 0%) wall  
 0 kB ( 0%) ggc
 unaccounted todo:   0.55 ( 1%) usr   0.00 ( 0%) sys   0.56 ( 1%) wall  
 0 kB ( 0%) ggc
 TOTAL :  47.49 3.4877.46
4276682 kB

and I was able to reduce function bodies loaded in WPA to 35% (from previous 
55%). The main problem
with speed was hidden in work list for congruence classes, where hash_set was 
used. I chose 

Re: [PATCH 3/5] IPA ICF pass

2014-10-10 Thread Martin Liška
On 09/26/2014 09:46 PM, Jan Hubicka wrote:
 Hi,
 this is on ipa-icf-gimple.c
 
 @@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void)
   {
 if (verify_edge_corresponds_to_fndecl (e, decl))
   {
 -   error (edge points to wrong declaration:);
 -   debug_tree (e-callee-decl);
 -   fprintf (stderr, Instead of:);
 -   debug_tree (decl);
 -   error_found = true;
 +   /* The edge can be redirected in WPA by IPA 
 ICF.
 +  Following check really ensures that it's
 +  not the case.  */
 +
 +   cgraph_node *current_node = cgraph_node::get 
 (decl);
 +   if (!current_node || 
 !current_node-icf_merged)
 
 I would move this into verify_edge_corresponds_to_fndecl.
 
 diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
 new file mode 100644
 index 000..7031eaa
 --- /dev/null
 +++ b/gcc/ipa-icf-gimple.c
 @@ -0,0 +1,384 @@
 +/* Interprocedural Identical Code Folding pass
 +   Copyright (C) 2014 Free Software Foundation, Inc.
 +
 +   Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska 
 mli...@suse.cz
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free
 +Software Foundation; either version 3, or (at your option) any later
 +version.
 +
 +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 +WARRANTY; without even the implied warranty of MERCHANTABILITY or
 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 +for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +http://www.gnu.org/licenses/.  */
 
 Please add toplevel comment about what the code does and how to use it.
 
 +namespace ipa_icf {
 +
 +/* Basic block equivalence comparison function that returns true if
 +   basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond.  */
 ... to each other?
 I would add short comment that as comparsion goes you build voclabulary
 of equivalences of variables/ssanames etc.
 So people reading the code do not get lost at very beggining.
 
 +
 +bool
 +func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
 +{
 +  unsigned i;
 +  gimple_stmt_iterator gsi1, gsi2;
 +  gimple s1, s2;
 +
 +  if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count
 +  || bb1-edge_count != bb2-edge_count)
 +return RETURN_FALSE ();
 
 The UPPERCASE looks ugly.  I see that RETURN_FALSE is a warpper for 
 return_false_with_msg
 that outputs line and file information.
 
 I would make it lowercase even if it is macro. You may consider using
 CXX_MEM_STAT_INFO style default argument to avoid function macro completely.
 Probably not big win given that it won't save you from preprocesor mess.
 +
 +  gsi1 = gsi_start_bb (bb1-bb);
 +  gsi2 = gsi_start_bb (bb2-bb);
 +
 +  for (i = 0; i  bb1-nondbg_stmt_count; i++)
 +{
 +  if (is_gimple_debug (gsi_stmt (gsi1)))
 + gsi_next_nondebug (gsi1);
 +
 +  if (is_gimple_debug (gsi_stmt (gsi2)))
 + gsi_next_nondebug (gsi2);
 +
 +  s1 = gsi_stmt (gsi1);
 +  s2 = gsi_stmt (gsi2);
 +
 +  if (gimple_code (s1) != gimple_code (s2))
 + return RETURN_FALSE_WITH_MSG (gimple codes are different);
 
 I think you need to compare EH here.  Consider case where one unit
 is compiled with -fno-exception and thus all EH regions are removed,
 while other function has EH regions in it.  Those are not equivalent.
 
 EH region is obtained by lookup_stmt_eh and then you need to comapre
 them for match as you do with gimple_resx_regoin.
 
 +  t1 = gimple_call_fndecl (s1);
 +  t2 = gimple_call_fndecl (s2);
 +
 +  /* Function pointer variables are not supported yet.  */
 
 They seems to be, compare_operand seems just right.
 
 +
 +/* Verifies for given GIMPLEs S1 and S2 that
 +   label statements are semantically equivalent.  */
 +
 +bool
 +func_checker::compare_gimple_label (gimple g1, gimple g2)
 +{
 +  if (m_ignore_labels)
 +return true;
 +
 +  tree t1 = gimple_label_label (g1);
 +  tree t2 = gimple_label_label (g2);
 +
 +  return compare_tree_ssa_label (t1, t2);
 +}
 
 I would expect the main BB loop to record BB in which label belongs to
 and the BB assciatio neing checked here.
 Otherwise I do not see how switch statements are compared to not have
 different permutations of targets. Also note that one BB may have
 multiple labels in them and they are equivalent.
 
 Also I would punt on occurence of FORCED_LABEL. Those are tricky as they
 may be passed around and compared for address and no one really defines
 what should happen.  Better to avoid those.

Hi.

I will 

Re: [PATCH 3/5] IPA ICF pass

2014-10-10 Thread Martin Liška
On 09/26/2014 11:27 PM, Jan Hubicka wrote:
 diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
 new file mode 100644
 index 000..f3472fe
 --- /dev/null
 +++ b/gcc/ipa-icf.c
 @@ -0,0 +1,2841 @@
 +/* Interprocedural Identical Code Folding pass
 +   Copyright (C) 2014 Free Software Foundation, Inc.
 +
 +   Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska 
 mli...@suse.cz
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free
 +Software Foundation; either version 3, or (at your option) any later
 +version.
 +
 +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 +WARRANTY; without even the implied warranty of MERCHANTABILITY or
 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 +for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +http://www.gnu.org/licenses/.  */
 +
 +/* Interprocedural Identical Code Folding for functions and
 +   read-only variables.
 +
 +   The goal of this transformation is to discover functions and read-only
 +   variables which do have exactly the same semantics.
 (or value)
 +
 +   In case of functions,
 +   we could either create a virtual clone or do a simple function wrapper
 +   that will call equivalent function. If the function is just locally 
 visible,
 +   all function calls can be redirected. For read-only variables, we create
 +   aliases if possible.
 +
 +   Optimization pass arranges as follows:
 
 The optimization pass is arranged as follows: (I guess)
 
 I also wonder if the gimple equality code should be in ipa_icf namespace, it 
 is intended
 to be shared with tail merging pass, so what about just calling it 
 gimple_sem_equality?
 
 +/* Verification function for edges E1 and E2.  */
 +
 +bool
 +func_checker::compare_edge (edge e1, edge e2)
 +{
 +  if (e1-flags != e2-flags)
 +return false;
 
 In future we may want to experiment with checking that edge probabilities with
 profile feedback match and refuse to merge BBs with different outgoing 
 probabilities
 (i.e. +-5%).
 Just add it as TODO there, please.
 +
 +/* Return true if types are compatible from perspective of ICF.  */
 +bool func_checker::types_are_compatible_p (tree t1, tree t2,
 
 Perhaps dropping _are_ would make sense, so we do not have two names
 for essentially same thing.
 +bool compare_polymorphic,
 +bool first_argument)
 +{
 +  if (TREE_CODE (t1) != TREE_CODE (t2))
 +return RETURN_FALSE_WITH_MSG (different tree types);
 +
 +  if (!types_compatible_p (t1, t2))
 +return RETURN_FALSE_WITH_MSG (types are not compatible);
 +
 +  if (get_alias_set (t1) != get_alias_set (t2))
 +return RETURN_FALSE_WITH_MSG (alias sets are different);
 
 You do not need to compare alias sets except for memory operations IMO.

Hello.

Yeah, you are right. But even Richard advised me to put it to a single place. 
Maybe we are a bit
more strict than it would be necessary. But I hope that's fine ;)

 +
 +  /* We call contains_polymorphic_type_p with this pointer type.  */
 +  if (first_argument  TREE_CODE (t1) == POINTER_TYPE)
 +{
 +  t1 = TREE_TYPE (t1);
 +  t2 = TREE_TYPE (t2);
 +}
 +
 +  if (compare_polymorphic
 +   (contains_polymorphic_type_p (t1) || contains_polymorphic_type_p 
 (t2)))
 +{
 +  if (!contains_polymorphic_type_p (t1) || !contains_polymorphic_type_p 
 (t2))
 +return RETURN_FALSE_WITH_MSG (one type is not polymorphic);
 +
 +  if (TYPE_MAIN_VARIANT (t1) != TYPE_MAIN_VARIANT (t2))
 +return RETURN_FALSE_WITH_MSG (type variants are different for 
 +  polymorphic type);
 
 I added types_must_be_same_for_odr (t1,t2) for you here.
 +/* Fast equality function based on knowledge known in WPA.  */
 +
 +bool
 +sem_function::equals_wpa (sem_item *item)
 +{
 +  gcc_assert (item-type == FUNC);
 +
 +  m_compared_func = static_castsem_function * (item);
 +
 +  if (arg_types.length () != m_compared_func-arg_types.length ())
 +return RETURN_FALSE_WITH_MSG (different number of arguments);
 +
 +  /* Checking types of arguments.  */
 +  for (unsigned i = 0; i  arg_types.length (); i++)
 +{
 +  /* This guard is here for function pointer with attributes 
 (pr59927.c).  */
 +  if (!arg_types[i] || !m_compared_func-arg_types[i])
 +return RETURN_FALSE_WITH_MSG (NULL argument type);
 +
 +  if (!func_checker::types_are_compatible_p (arg_types[i],
 +  m_compared_func-arg_types[i],
 +  true, i == 0))
 +return RETURN_FALSE_WITH_MSG (argument type is different);
 +}
 +
 +  /* Result type checking.  */
 +  if (!func_checker::types_are_compatible_p (result_type,
 +  m_compared_func-result_type))
 +return RETURN_FALSE_WITH_MSG (result types are different);
 
 You may want to compare ECF flags, such as nothrow/const/pure.  We do not
 want to merge 

Re: [PATCH 3/5] IPA ICF pass

2014-10-13 Thread Martin Liška

On 10/11/2014 10:19 AM, Jan Hubicka wrote:


After few days of measurement and tuning, I was able to get numbers to the 
following shape:
Execution times (seconds)
  phase setup :   0.00 ( 0%) usr   0.00 ( 0%) sys   0.00 ( 0%) wall 
   1412 kB ( 0%) ggc
  phase opt and generate  :  27.83 (59%) usr   0.66 (19%) sys  28.52 (37%) wall 
1028813 kB (24%) ggc
  phase stream in :  16.90 (36%) usr   0.63 (18%) sys  17.60 (23%) wall 
3246453 kB (76%) ggc
  phase stream out:   2.76 ( 6%) usr   2.19 (63%) sys  31.34 (40%) wall 
  2 kB ( 0%) ggc
  callgraph optimization  :   0.36 ( 1%) usr   0.00 ( 0%) sys   0.35 ( 0%) wall 
 40 kB ( 0%) ggc
  ipa dead code removal   :   3.31 ( 7%) usr   0.01 ( 0%) sys   3.25 ( 4%) wall 
  0 kB ( 0%) ggc
  ipa virtual call target :   3.69 ( 8%) usr   0.03 ( 1%) sys   3.80 ( 5%) wall 
 21 kB ( 0%) ggc
  ipa devirtualization:   0.12 ( 0%) usr   0.00 ( 0%) sys   0.15 ( 0%) wall 
  13704 kB ( 0%) ggc
  ipa cp  :   1.11 ( 2%) usr   0.07 ( 2%) sys   1.17 ( 2%) wall 
 188558 kB ( 4%) ggc
  ipa inlining heuristics :   8.17 (17%) usr   0.14 ( 4%) sys   8.27 (11%) wall 
 494738 kB (12%) ggc
  ipa comdats :   0.12 ( 0%) usr   0.00 ( 0%) sys   0.12 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa lto gimple in   :   1.86 ( 4%) usr   0.40 (11%) sys   2.20 ( 3%) wall 
 537970 kB (13%) ggc
  ipa lto gimple out  :   0.19 ( 0%) usr   0.08 ( 2%) sys   0.27 ( 0%) wall 
  2 kB ( 0%) ggc
  ipa lto decl in :  12.20 (26%) usr   0.37 (11%) sys  12.64 (16%) wall 
2441687 kB (57%) ggc
  ipa lto decl out:   2.51 ( 5%) usr   0.21 ( 6%) sys   2.71 ( 3%) wall 
  0 kB ( 0%) ggc
  ipa lto constructors in :   0.13 ( 0%) usr   0.02 ( 1%) sys   0.17 ( 0%) wall 
  15692 kB ( 0%) ggc
  ipa lto constructors out:   0.03 ( 0%) usr   0.00 ( 0%) sys   0.03 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa lto cgraph I/O  :   0.54 ( 1%) usr   0.09 ( 3%) sys   0.63 ( 1%) wall 
 407182 kB (10%) ggc
  ipa lto decl merge  :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.34 ( 2%) wall 
   8220 kB ( 0%) ggc
  ipa lto cgraph merge:   1.00 ( 2%) usr   0.00 ( 0%) sys   1.00 ( 1%) wall 
  14605 kB ( 0%) ggc
  whopr wpa   :   0.92 ( 2%) usr   0.00 ( 0%) sys   0.89 ( 1%) wall 
  1 kB ( 0%) ggc
  whopr wpa I/O   :   0.01 ( 0%) usr   1.90 (55%) sys  28.31 (37%) wall 
  0 kB ( 0%) ggc
  whopr partitioning  :   2.81 ( 6%) usr   0.01 ( 0%) sys   2.83 ( 4%) wall 
   4943 kB ( 0%) ggc
  ipa reference   :   1.34 ( 3%) usr   0.00 ( 0%) sys   1.35 ( 2%) wall 
  0 kB ( 0%) ggc
  ipa profile :   0.20 ( 0%) usr   0.01 ( 0%) sys   0.21 ( 0%) wall 
  0 kB ( 0%) ggc
  ipa pure const  :   1.62 ( 3%) usr   0.00 ( 0%) sys   1.63 ( 2%) wall 
  0 kB ( 0%) ggc
  ipa icf :   2.65 ( 6%) usr   0.02 ( 1%) sys   2.68 ( 3%) wall 
   1352 kB ( 0%) ggc
  inline parameters   :   0.00 ( 0%) usr   0.01 ( 0%) sys   0.00 ( 0%) wall 
  0 kB ( 0%) ggc
  tree SSA rewrite:   0.11 ( 0%) usr   0.01 ( 0%) sys   0.08 ( 0%) wall 
  18919 kB ( 0%) ggc
  tree SSA other  :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.01 ( 0%) wall 
  0 kB ( 0%) ggc
  tree SSA incremental:   0.24 ( 1%) usr   0.01 ( 0%) sys   0.32 ( 0%) wall 
  11325 kB ( 0%) ggc
  tree operand scan   :   0.15 ( 0%) usr   0.02 ( 1%) sys   0.18 ( 0%) wall 
 116283 kB ( 3%) ggc
  dominance frontiers :   0.01 ( 0%) usr   0.00 ( 0%) sys   0.02 ( 0%) wall 
  0 kB ( 0%) ggc
  dominance computation   :   0.13 ( 0%) usr   0.01 ( 0%) sys   0.16 ( 0%) wall 
  0 kB ( 0%) ggc
  varconst:   0.01 ( 0%) usr   0.02 ( 1%) sys   0.01 ( 0%) wall 
  0 kB ( 0%) ggc
  loop fini   :   0.02 ( 0%) usr   0.00 ( 0%) sys   0.04 ( 0%) wall 
  0 kB ( 0%) ggc
  unaccounted todo:   0.55 ( 1%) usr   0.00 ( 0%) sys   0.56 ( 1%) wall 
  0 kB ( 0%) ggc
  TOTAL :  47.49 3.4877.46
4276682 kB

and I was able to reduce function bodies loaded in WPA to 35% (from previous 
55%). The main problem


35% means that 35% of all function bodies are compared with something else? 
That feels pretty high.
but overall numbers are not so terrible.


Currently, the pass is able to merge 32K functions. As you know, we group 
functions to so called classes.
According to stats, average non-singular class size contains at the end of 
comparison 7.39 candidates and we
have 5K such functions. Because we load body for each candidate in such groups, 
it gives us minimum number
of loaded bodies: 37K. As we load 70K function, we have still place to improve. 
But I guess WPA body-less
comparison is quite efficient.




with speed was hidden in work list for congruence classes, where hash_set was 
used. I chose the data
structure to support delete operation, but it was really slow. Thus, hash_set 
was replaced with linked list
and a flag is used to identify if a set is 

Re: [PATCH 3/5] IPA ICF pass

2014-10-13 Thread Martin Liška

On 10/11/2014 02:05 AM, Martin Liška wrote:

On 09/26/2014 09:46 PM, Jan Hubicka wrote:

Hi,
this is on ipa-icf-gimple.c

@@ -2827,11 +2829,19 @@ cgraph_node::verify_node (void)
{
  if (verify_edge_corresponds_to_fndecl (e, decl))
{
- error (edge points to wrong declaration:);
- debug_tree (e-callee-decl);
- fprintf (stderr, Instead of:);
- debug_tree (decl);
- error_found = true;
+ /* The edge can be redirected in WPA by IPA 
ICF.
+Following check really ensures that it's
+not the case.  */
+
+ cgraph_node *current_node = cgraph_node::get 
(decl);
+ if (!current_node || 
!current_node-icf_merged)

I would move this into verify_edge_corresponds_to_fndecl.

diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
new file mode 100644
index 000..7031eaa
--- /dev/null
+++ b/gcc/ipa-icf-gimple.c
@@ -0,0 +1,384 @@
+/* Interprocedural Identical Code Folding pass
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   Contributed by Jan Hubicka hubi...@ucw.cz and Martin Liska 
mli...@suse.cz
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */

Please add toplevel comment about what the code does and how to use it.

+namespace ipa_icf {
+
+/* Basic block equivalence comparison function that returns true if
+   basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond.  */
... to each other?
I would add short comment that as comparsion goes you build voclabulary
of equivalences of variables/ssanames etc.
So people reading the code do not get lost at very beggining.

+
+bool
+func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
+{
+  unsigned i;
+  gimple_stmt_iterator gsi1, gsi2;
+  gimple s1, s2;
+
+  if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count
+  || bb1-edge_count != bb2-edge_count)
+return RETURN_FALSE ();

The UPPERCASE looks ugly.  I see that RETURN_FALSE is a warpper for 
return_false_with_msg
that outputs line and file information.

I would make it lowercase even if it is macro. You may consider using
CXX_MEM_STAT_INFO style default argument to avoid function macro completely.
Probably not big win given that it won't save you from preprocesor mess.
+
+  gsi1 = gsi_start_bb (bb1-bb);
+  gsi2 = gsi_start_bb (bb2-bb);
+
+  for (i = 0; i  bb1-nondbg_stmt_count; i++)
+{
+  if (is_gimple_debug (gsi_stmt (gsi1)))
+   gsi_next_nondebug (gsi1);
+
+  if (is_gimple_debug (gsi_stmt (gsi2)))
+   gsi_next_nondebug (gsi2);
+
+  s1 = gsi_stmt (gsi1);
+  s2 = gsi_stmt (gsi2);
+
+  if (gimple_code (s1) != gimple_code (s2))
+   return RETURN_FALSE_WITH_MSG (gimple codes are different);

I think you need to compare EH here.  Consider case where one unit
is compiled with -fno-exception and thus all EH regions are removed,
while other function has EH regions in it.  Those are not equivalent.

EH region is obtained by lookup_stmt_eh and then you need to comapre
them for match as you do with gimple_resx_regoin.

+  t1 = gimple_call_fndecl (s1);
+  t2 = gimple_call_fndecl (s2);
+
+  /* Function pointer variables are not supported yet.  */

They seems to be, compare_operand seems just right.

+
+/* Verifies for given GIMPLEs S1 and S2 that
+   label statements are semantically equivalent.  */
+
+bool
+func_checker::compare_gimple_label (gimple g1, gimple g2)
+{
+  if (m_ignore_labels)
+return true;
+
+  tree t1 = gimple_label_label (g1);
+  tree t2 = gimple_label_label (g2);
+
+  return compare_tree_ssa_label (t1, t2);
+}

I would expect the main BB loop to record BB in which label belongs to
and the BB assciatio neing checked here.
Otherwise I do not see how switch statements are compared to not have
different permutations of targets. Also note that one BB may have
multiple labels in them and they are equivalent.

Also I would punt on occurence of FORCED_LABEL. Those are tricky as they
may be passed around and compared for address and no one really defines
what should happen.  Better to avoid those.


Hi.

I will remove this support in the pass.



+
+/* Verifies for given

[RFC, PATCH]: Introduction of callgraph annotation class

2014-10-15 Thread Martin Liška

Hello.

Following patch introduces a new class called callgraph_annotation. Idea behind 
the patch is to provide a generic interface one can use to register custom info 
related to a cgraph_node. As you know, symbol_table provides hooks for 
creation, deletion and duplication of a cgraph_node. If you have a pass, you 
need to handle all these hooks and store custom data in your data structure.

As an example, after discussion with Martin, I chose usage in ipa-prop.h:

data structure:
vecipa_node_params ipa_node_params_vector

if the pass handles an event, following chunk is executed:
if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid)
ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1);

The problem is that you can have sparse UIDs of cgraph_nodes and every time you 
have to allocate a vector of size equal to cgraph_max_uid.

As a replacement, I implemented first version of cgraph_annotation that internally 
uses hash_mapcgraph_unique_identifier, T.
Every time a node is deleted, we remove corresponding data associated to the 
node.

What do you think about it?

Thank you,
Martin
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 04ce0c0..bf34c96 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1171,6 +1171,7 @@ OBJS = \
 	cfgrtl.o \
 	symtab.o \
 	cgraph.o \
+	annotation.o \
 	cgraphbuild.o \
 	cgraphunit.o \
 	cgraphclones.o \
diff --git a/gcc/annotation.c b/gcc/annotation.c
new file mode 100644
index 000..a8b6053
--- /dev/null
+++ b/gcc/annotation.c
@@ -0,0 +1 @@
+#include annotation.h
diff --git a/gcc/annotation.h b/gcc/annotation.h
new file mode 100644
index 000..7520677
--- /dev/null
+++ b/gcc/annotation.h
@@ -0,0 +1,285 @@
+/* Annotations handling code.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Martin Liska
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+http://www.gnu.org/licenses/.  */
+
+#ifndef GCC_ANNOTATION_H
+#define GCC_ANNOTATION_H
+
+#include config.h
+#include system.h
+#include coretypes.h
+#include tm.h
+#include tree.h
+#include varasm.h
+#include calls.h
+#include print-tree.h
+#include tree-inline.h
+#include langhooks.h
+#include hashtab.h
+#include toplev.h
+#include flags.h
+#include debug.h
+#include target.h
+#include cgraph.h
+#include hash-map.h
+
+#define ANNOTATION_DELETED_VALUE -1
+#define ANNOTATION_EMPTY_VALUE 0
+
+struct annotation_hashmap_traits: default_hashmap_traits
+{
+  static inline
+  hashval_t hash (const int v)
+  {
+return (hashval_t)v;
+  }
+
+  templatetypename T
+  static inline
+  bool is_deleted (T e)
+  {
+return e.m_key == ANNOTATION_DELETED_VALUE;
+  }
+
+  templatetypename T
+  static inline
+  bool is_empty (T e)
+  {
+return e.m_key == ANNOTATION_EMPTY_VALUE;
+  }
+
+  templatetypename T
+  static inline
+  void mark_deleted (T e)
+  {
+e.m_key = ANNOTATION_DELETED_VALUE;
+  }
+
+  templatetypename T
+  static inline
+  void mark_empty (T e)
+  {
+e.m_key = ANNOTATION_EMPTY_VALUE;
+  }
+};
+
+template class T
+class cgraph_annotation
+{
+public:
+  /* Default construction takes SYMTAB as an argument.  */
+  cgraph_annotation (symbol_table *symtab): m_symtab (symtab)
+  {
+cgraph_node *node;
+
+FOR_EACH_FUNCTION (node)
+{
+  gcc_assert (node-annotation_uid  0);
+  m_reverse_map.put (node, node-annotation_uid);
+}
+
+m_map = new  hash_mapint, T*, annotation_hashmap_traits();
+
+m_symtab_insertion_hook =
+  symtab-add_cgraph_insertion_hook
+  (cgraph_annotation::symtab_insertion, this);
+
+m_symtab_removal_hook =
+  symtab-add_cgraph_removal_hook
+  (cgraph_annotation::symtab_removal, this);
+m_symtab_duplication_hook =
+  symtab-add_cgraph_duplication_hook
+  (cgraph_annotation::symtab_duplication, this);
+
+  }
+
+  /* Destructor.  */
+  ~cgraph_annotation ()
+  {
+m_symtab-remove_cgraph_insertion_hook (m_symtab_insertion_hook);
+m_symtab-remove_cgraph_removal_hook (m_symtab_removal_hook);
+m_symtab-remove_cgraph_duplication_hook (m_symtab_duplication_hook);
+
+m_map-traverse void *, cgraph_annotation::release (NULL);
+  }
+
+  /* Traverses all annotations with a function F called with
+ ARG as argument.  */
+  templatetypename Arg, bool (*f)(const T , Arg)
+  void traverse (Arg a) const
+  {
+m_map-traverse f (a);
+  }
+
+  /* Function for registering insertion hook.  */
+  template void (*f) (const 

Re: [PATCH 3/5] IPA ICF pass

2014-10-15 Thread Martin Liška

On 10/14/2014 06:04 PM, Jan Hubicka wrote:

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index fb41b01..2de98b4 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -172,6 +172,12 @@ public:
/* Dump referring in list to FILE.  */
void dump_referring (FILE *);

+  /* Get number of references for this node.  */
+  inline unsigned get_references_count (void)
+  {
+return ref_list.references ? ref_list.references-length () : 0;
+  }


Probably better called num_references() (like we have num_edge in basic-block.h)

@@ -8068,6 +8069,19 @@ it may significantly increase code size
  (see @option{--param ipcp-unit-growth=@var{value}}).
  This flag is enabled by default at @option{-O3}.

+@item -fipa-icf
+@opindex fipa-icf
+Perform Identical Code Folding for functions and read-only variables.
+The optimization reduces code size and may disturb unwind stacks by replacing
+a function by equivalent one with a different name. The optimization works
+more effectively with link time optimization enabled.
+
+Nevertheless the behavior is similar to Gold Linker ICF optimization, GCC ICF
+works on different levels and thus the optimizations are not same - there are
+equivalences that are found only by GCC and equivalences found only by Gold.
+
+This flag is enabled by default at @option{-O2}.

... and -Os?

+case ARRAY_REF:
+case ARRAY_RANGE_REF:
+  {
+   x1 = TREE_OPERAND (t1, 0);
+   x2 = TREE_OPERAND (t2, 0);
+   y1 = TREE_OPERAND (t1, 1);
+   y2 = TREE_OPERAND (t2, 1);
+
+   if (!compare_operand (array_ref_low_bound (t1),
+ array_ref_low_bound (t2)))
+ return return_false_with_msg ();
+   if (!compare_operand (array_ref_element_size (t1),
+ array_ref_element_size (t2)))
+ return return_false_with_msg ();
+   if (!compare_operand (x1, x2))
+ return return_false_with_msg ();
+   return compare_operand (y1, y2);
+  }


No need for {...} if there are no local vars.

+bool
+func_checker::compare_function_decl (tree t1, tree t2)
+{
+  bool ret = false;
+
+  if (t1 == t2)
+return true;
+
+  symtab_node *n1 = symtab_node::get (t1);
+  symtab_node *n2 = symtab_node::get (t2);
+
+  if (m_ignored_source_nodes != NULL  m_ignored_target_nodes != NULL)
+{
+  ret = m_ignored_source_nodes-contains (n1)
+m_ignored_target_nodes-contains (n2);
+
+  if (ret)
+   return true;
+}
+
+  /* If function decl is WEAKREF, we compare targets.  */
+  cgraph_node *f1 = cgraph_node::get (t1);
+  cgraph_node *f2 = cgraph_node::get (t2);
+
+  if(f1  f2  f1-weakref  f2-weakref)
+ret = f1-alias_target == f2-alias_target;
+
+  return ret;


Comparing aliases is bit more complicated than just handling weakrefs. I have
patch for symtab_node::equivalent_address_p somewhre in queue.  lets just drop
the fancy stuff for the moment and compare f1f2 for equivalence.

+  ret = compare_decl (t1, t2);


Why functions are not compared with compare_decl while variables are?

+
+  return return_with_debug (ret);
+}
+
+void
+func_checker::parse_labels (sem_bb *bb)
+{
+  for (gimple_stmt_iterator gsi = gsi_start_bb (bb-bb); !gsi_end_p (gsi);
+   gsi_next (gsi))
+{
+  gimple stmt = gsi_stmt (gsi);
+
+  if (gimple_code (stmt) == GIMPLE_LABEL)
+   {
+ tree t = gimple_label_label (stmt);
+ gcc_assert (TREE_CODE (t) == LABEL_DECL);
+
+ m_label_bb_map.put (t, bb-bb-index);
+   }
+}
+}
+
+/* Basic block equivalence comparison function that returns true if
+   basic blocks BB1 and BB2 (from functions FUNC1 and FUNC2) correspond.
+
+   In general, a collection of equivalence dictionaries is built for types
+   like SSA names, declarations (VAR_DECL, PARM_DECL, ..). This infrastructure
+   is utilized by every statement-by-stament comparison function.  */
+
+bool
+func_checker::compare_bb (sem_bb *bb1, sem_bb *bb2)
+{
+  unsigned i;
+  gimple_stmt_iterator gsi1, gsi2;
+  gimple s1, s2;
+
+  if (bb1-nondbg_stmt_count != bb2-nondbg_stmt_count
+  || bb1-edge_count != bb2-edge_count)
+return return_false ();
+
+  gsi1 = gsi_start_bb (bb1-bb);
+  gsi2 = gsi_start_bb (bb2-bb);
+
+  for (i = 0; i  bb1-nondbg_stmt_count; i++)
+{
+  if (is_gimple_debug (gsi_stmt (gsi1)))
+   gsi_next_nondebug (gsi1);
+
+  if (is_gimple_debug (gsi_stmt (gsi2)))
+   gsi_next_nondebug (gsi2);
+
+  s1 = gsi_stmt (gsi1);
+  s2 = gsi_stmt (gsi2);
+
+  int eh1 = lookup_stmt_eh_lp_fn
+   (DECL_STRUCT_FUNCTION (m_source_func_decl), s1);
+  int eh2 = lookup_stmt_eh_lp_fn
+   (DECL_STRUCT_FUNCTION (m_target_func_decl), s2);
+
+  if (eh1 != eh2)
+   return return_false_with_msg (EH regions are different);
+
+  if (gimple_code (s1) != gimple_code (s2))
+   return return_false_with_msg (gimple codes are different);
+
+  switch (gimple_code (s1))
+   {
+   case GIMPLE_CALL:
+ if (!compare_gimple_call 

Re: [RFC, PATCH]: Introduction of callgraph annotation class

2014-10-16 Thread Martin Liška

On 10/16/2014 01:31 PM, Richard Biener wrote:

On Wed, Oct 15, 2014 at 6:26 PM, Martin Liška mli...@suse.cz wrote:

Hello.

Following patch introduces a new class called callgraph_annotation. Idea
behind the patch is to provide a generic interface one can use to register
custom info related to a cgraph_node. As you know, symbol_table provides
hooks for creation, deletion and duplication of a cgraph_node. If you have a
pass, you need to handle all these hooks and store custom data in your data
structure.

As an example, after discussion with Martin, I chose usage in ipa-prop.h:

data structure:
vecipa_node_params ipa_node_params_vector

if the pass handles an event, following chunk is executed:
if (ipa_node_params_vector.length () = (unsigned) symtab-cgraph_max_uid)
 ipa_node_params_vector.safe_grow_cleared (symtab-cgraph_max_uid + 1);

The problem is that you can have sparse UIDs of cgraph_nodes and every time
you have to allocate a vector of size equal to cgraph_max_uid.

As a replacement, I implemented first version of cgraph_annotation that
internally uses hash_mapcgraph_unique_identifier, T.
Every time a node is deleted, we remove corresponding data associated to the
node.

What do you think about it?


I don't like generic annotation facilities at all.  Would it be possible
to make cgraph UIDs not sparse?  (keep a free-list of cgraph nodes
with UID  cgraph_max_uid, only really free nodes at the end)
Using a different data structure than a vector indexed by cgraph UID
should also be easily possible (a map from UID to data, hash_map int, T).


Hello.

If I recall correctly, we recycle cgraph_nodes and it's possible that an UID is 
given to different nodes:
symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from 
perspective that it cannot be used as a index to a vector.

It was also Honza's note that one can choose inner implementation of such 
annotation class. We can implement both sparse (hash_map) and consecutive 
vector data structure.

According to first numbers I was given, Inkscape allocates about ~64k 
cgraph_nodes in WPA. After function merging is processed, it shrinks to about a 
half. So that, our free list contains the half of nodes. If we use consecutive 
vector, our memory impact is bigger thank necessary.

Martin




Richard.


Thank you,
Martin




Re: [RFC, PATCH]: Introduction of callgraph annotation class

2014-10-16 Thread Martin Liška

On 10/16/2014 02:01 PM, Jan Hubicka wrote:

Hello.

If I recall correctly, we recycle cgraph_nodes and it's possible that an UID
is given to different nodes:
symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from
perspective that it cannot be used as a index to a vector.

It was also Honza's note that one can choose inner implementation of such
annotation class. We can implement both sparse (hash_map) and consecutive
vector data structure.

According to first numbers I was given, Inkscape allocates about ~64k
cgraph_nodes in WPA. After function merging is processed, it shrinks to
about a half. So that, our free list contains the half of nodes. If we use
consecutive vector, our memory impact is bigger thank necessary.


I don't think there is anything that forces us to retain the original
UID allocation after WPA merging?  So why not compact it?


We could, if we have way to update the summaries that are currently UID 
allocated.
With annotation template we could have handle to do that more easily than 
diving into
each of passes maintaining summaries by hand.


Agree with that, I will be central point one can implement these optimizations.
One idea is to implement lazy allocation where we can allocate memory just in 
case
someone calls annotation::get method.



On the other hand it still does not make the records quite dense in cases
  1) you do not want to have separate records for clones because you know clones
 and master are identical


It would be quite easy to implement
annotation::get_for_origin (int clone_id), where we find origin for the clone 
and
return data associated to such origin node.


  2) you care only about definitions


Maybe similar stuff?

Martin


...
At some point we discussed introducing separate UIDs for those but that was also
not very welcome (and I agree we already have bit too many UIDs for functions - 
DECL_UID,
node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid

I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite 
finished
it.

Honza


Richard.


Martin




Richard.


Thank you,
Martin







Re: [RFC, PATCH]: Introduction of callgraph annotation class

2014-10-16 Thread Martin Liška

On 10/16/2014 02:05 PM, Martin Liška wrote:

On 10/16/2014 02:01 PM, Jan Hubicka wrote:

Hello.

If I recall correctly, we recycle cgraph_nodes and it's possible that an UID
is given to different nodes:
symbol_table::allocate_cgraph_symbol (void). Such uid is problematic from
perspective that it cannot be used as a index to a vector.

It was also Honza's note that one can choose inner implementation of such
annotation class. We can implement both sparse (hash_map) and consecutive
vector data structure.

According to first numbers I was given, Inkscape allocates about ~64k
cgraph_nodes in WPA. After function merging is processed, it shrinks to
about a half. So that, our free list contains the half of nodes. If we use
consecutive vector, our memory impact is bigger thank necessary.


I don't think there is anything that forces us to retain the original
UID allocation after WPA merging?  So why not compact it?


We could, if we have way to update the summaries that are currently UID 
allocated.
With annotation template we could have handle to do that more easily than 
diving into
each of passes maintaining summaries by hand.


Agree with that, I will be central point one can implement these optimizations.


s/I/it


One idea is to implement lazy allocation where we can allocate memory just in 
case
someone calls annotation::get method.



On the other hand it still does not make the records quite dense in cases
  1) you do not want to have separate records for clones because you know clones
 and master are identical


It would be quite easy to implement
annotation::get_for_origin (int clone_id), where we find origin for the clone 
and
return data associated to such origin node.


  2) you care only about definitions


Maybe similar stuff?

Martin


...
At some point we discussed introducing separate UIDs for those but that was also
not very welcome (and I agree we already have bit too many UIDs for functions - 
DECL_UID,
node-uid, DECL_STRUCT_FUNCTION (node)-uid, profile_uid

I tried to get rid of DECL_STRUCT_FUNCTION uid at some point, but did not quite 
finished
it.

Honza


Richard.


Martin




Richard.


Thank you,
Martin









IPA ICF fallout: i586 bootstrap failure fix

2014-10-16 Thread Martin Liška

Hello.

Following patch fixes i586 bootstrap failure: 
https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html

../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type 
‘long unsigned int’, but argument 3 has type 
‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ 
[-Werror=format=]
   worklist.size ());
   ^
../../src-trunk/gcc/ipa-icf.c: In member function ‘void 
ipa_icf::sem_item_optimizer::dump_cong_classes()’:
../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of 
type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ 
[-Werror=format=]
 m_classes_count, m_classes.elements(), m_items.length ());

Ready for thunk?

Thank you,
Martin
gcc/ChangeLog:

2014-10-16  Martin Liska  mli...@suse.cz

* ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast 
added.
(sem_item_optimizer::dump_cong_classes): Likewise.
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index 4e73849..f7510b3 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -2093,7 +2093,7 @@ sem_item_optimizer::process_cong_reduction (void)
 
   if (dump_file)
 fprintf (dump_file, Worklist has been filled with: %lu\n,
-	 worklist.size ());
+	 (unsigned long) worklist.size ());
 
   if (dump_file  (dump_flags  TDF_DETAILS))
 fprintf (dump_file, Congruence class reduction\n);
@@ -2113,7 +2113,7 @@ sem_item_optimizer::dump_cong_classes (void)
 
   fprintf (dump_file,
 	   Congruence classes: %u (unique hash values: %lu), with total: %u items\n,
-	   m_classes_count, m_classes.elements(), m_items.length ());
+	   m_classes_count, (unsigned long) m_classes.elements(), m_items.length ());
 
   /* Histogram calculation.  */
   unsigned int max_index = 0;


Re: IPA ICF fallout: i586 bootstrap failure fix

2014-10-16 Thread Martin Liška

On 10/16/2014 02:45 PM, Jakub Jelinek wrote:

On Thu, Oct 16, 2014 at 02:41:36PM +0200, Martin Liška wrote:

Hello.

Following patch fixes i586 bootstrap failure: 
https://gcc.gnu.org/ml/gcc-testresults/2014-10/msg01713.html

../../src-trunk/gcc/ipa-icf.c:2096:23: error: format ‘%lu’ expects argument of type 
‘long unsigned int’, but argument 3 has type 
‘std::listipa_icf::congruence_class*::size_type {aka unsigned int}’ 
[-Werror=format=]
worklist.size ());
^
../../src-trunk/gcc/ipa-icf.c: In member function ‘void 
ipa_icf::sem_item_optimizer::dump_cong_classes()’:
../../src-trunk/gcc/ipa-icf.c:2116:61: error: format ‘%lu’ expects argument of 
type ‘long unsigned int’, but argument 4 has type ‘size_t {aka unsigned int}’ 
[-Werror=format=]
  m_classes_count, m_classes.elements(), m_items.length ());

Ready for thunk?

Thank you,
Martin



gcc/ChangeLog:

2014-10-16  Martin Liska  mli...@suse.cz

* ipa-icf.c (sem_item_optimizer::process_cong_reduction): size_t cast 
added.


Too long line, please wrap.  I'd write Cast to unsigned long. instead.


Thank you for note, applied as suggested.

Martin




(sem_item_optimizer::dump_cong_classes): Likewise.


Ok.

Jakub





IPA ICF fallout: fix for two ipa-icf-*.C tests

2014-10-16 Thread Martin Liška

Hello.

I forgot that RUNTESTFLAGS=ipa.exp does not execute *.C tests cases residing 
in testsuite/g++.dg/ipa/ folder.
The patch fixes two of my newly added tests.

Considered as obvious.

Thank you,
Martin
gcc/testsuite/ChangeLog:

2014-10-16  Martin Liska  mli...@suse.cz

* g++.dg/ipa/ipa-icf-4.C: Correct number of equivalences
set.
* g++.dg/ipa/ipa-icf-5.C: Likewise
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
index 9d17889..9434289 100644
--- a/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
+++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-4.C
@@ -44,5 +44,5 @@ int main()
 }
 
 /* { dg-final { scan-ipa-dump Varpool alias has been created icf  } } */
-/* { dg-final { scan-ipa-dump Equal symbols: 2 icf  } } */
+/* { dg-final { scan-ipa-dump Equal symbols: 6 icf  } } */
 /* { dg-final { cleanup-ipa-dump icf } } */
diff --git a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
index 728df20..7dbce88 100644
--- a/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
+++ b/gcc/testsuite/g++.dg/ipa/ipa-icf-5.C
@@ -19,6 +19,5 @@ int main()
   return myarray.a - myarray_alias.a;
 }
 
-/* { dg-final { scan-ipa-dump Varpool alias cannot be created \\(alias cycle\\). icf  } } */
-/* { dg-final { scan-ipa-dump Equal symbols: 1 icf  } } */
+/* { dg-final { scan-ipa-dump Equal symbols: 0 icf  } } */
 /* { dg-final { cleanup-ipa-dump icf } } */


IPA ICF fallout: fox for libasan and pr43077-1.c

2014-10-16 Thread Martin Liška

Hello.

After IRC discussion with Jakub, I disable IPA ICF for ASAN.
Second part of the patch contains fix for pr43077-1.c.

Considered as pre-approved.
Thanks,
Martin
gcc/testsuite/ChangeLog:

2014-10-16  Martin Liska  mli...@suse.cz

* gcc.dg/guality/pr43077-1.c: IPA ICF disabled
to match defined expectations.

libsanitizer/ChangeLog:

2014-10-16  Martin Liska  mli...@suse.cz

* asan/Makefile.am: IPA ICF pass is disabled.
* asan/Makefile.in: Likewise.

diff --git a/gcc/testsuite/gcc.dg/guality/pr43077-1.c b/gcc/testsuite/gcc.dg/guality/pr43077-1.c
index d8d5eeb..84bd59e 100644
--- a/gcc/testsuite/gcc.dg/guality/pr43077-1.c
+++ b/gcc/testsuite/gcc.dg/guality/pr43077-1.c
@@ -1,6 +1,6 @@
 /* PR debug/43077 */
 /* { dg-do run } */
-/* { dg-options -g } */
+/* { dg-options -g -fno-ipa-icf } */
 
 int varb;
 
diff --git a/libsanitizer/asan/Makefile.am b/libsanitizer/asan/Makefile.am
index 12f20ae..d499c72 100644
--- a/libsanitizer/asan/Makefile.am
+++ b/libsanitizer/asan/Makefile.am
@@ -7,7 +7,7 @@ DEFS = -D_GNU_SOURCE -D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D
 if USING_MAC_INTERPOSE
 DEFS += -DMAC_INTERPOSE_FUNCTIONS -DMISSING_BLOCKS_SUPPORT
 endif
-AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros
+AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long  -fPIC -fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer -funwind-tables -fvisibility=hidden -Wno-variadic-macros -fno-ipa-icf
 AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 ACLOCAL_AMFLAGS = -I $(top_srcdir) -I $(top_srcdir)/config
 
diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in
index 862eec4..00a614b 100644
--- a/libsanitizer/asan/Makefile.in
+++ b/libsanitizer/asan/Makefile.in
@@ -269,7 +269,8 @@ gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
 AM_CXXFLAGS = -Wall -W -Wno-unused-parameter -Wwrite-strings -pedantic \
 	-Wno-long-long -fPIC -fno-builtin -fno-exceptions -fno-rtti \
 	-fomit-frame-pointer -funwind-tables -fvisibility=hidden \
-	-Wno-variadic-macros $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
+	-Wno-variadic-macros -fno-ipa-icf \
+	$(LIBSTDCXX_RAW_CXX_CXXFLAGS)
 ACLOCAL_AMFLAGS = -I $(top_srcdir) -I $(top_srcdir)/config
 toolexeclib_LTLIBRARIES = libasan.la
 nodist_toolexeclib_HEADERS = libasan_preinit.o


[PATCH,i686]: Temporary fir for PR63566

2014-10-17 Thread Martin Liška

Hello.

After IRC discussion, IPA ICF will set local flag to false for both original 
and node that becomes an alias.
That will enforce equal calling convention to be use.

i686-pc-linux bootstrap has been still running, I will commit the fix as soon 
as it finishes.
I consider it as pre-approved.

Thanks you,
Martin
gcc/ChangeLog:

2014-10-17  Martin Liska  mli...@suse.cz

* ipa-icf.c (sem_function::merge): Local flags are set to false
to enforce equal calling convention to be used.
* opts.c (common_handle_option): Indentation fix.
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index f7510b3..0e6bd9a 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -630,6 +630,11 @@ sem_function::merge (sem_item *alias_item)
   cgraph_node::create_alias (alias_func-decl, decl);
   alias-resolve_alias (original);
 
+  /* Workaround for PR63566 that forces equal calling convention
+	 to be used.  */
+  alias-local.local = false;
+  original-local.local = false;
+
   if (dump_file)
 	fprintf (dump_file, Callgraph alias has been created.\n\n);
 }
diff --git a/gcc/opts.c b/gcc/opts.c
index dc8ddf4..3054196 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1982,8 +1982,8 @@ common_handle_option (struct gcc_options *opts,
   break;
 
 case OPT_fipa_icf:
-	opts-x_flag_ipa_icf_functions = value;
-	opts-x_flag_ipa_icf_variables = value;
+  opts-x_flag_ipa_icf_functions = value;
+  opts-x_flag_ipa_icf_variables = value;
   break;
 
 default:


[PATCH] Fix for PR63569

2014-10-17 Thread Martin Liška

Hello.

Following patch fixes PR63569.

Bootstrap executed on ppc64-linux and no regression seen on x86_64-pc-linux.
Ready for trunk?

Thank you,
Martin
gcc/testsuite/ChangeLog:

2014-10-17  Martin Liska  mli...@suse.cz

* gcc.dg/ipa/ipa-icf-31.c: New test.


gcc/ChangeLog:

2014-10-17  Martin Liska  mli...@suse.cz

* ipa-icf-gimple.c (func_checker::compare_volatility): New function.
(func_checker::compare_gimple_call): Volatility check added.
(func_checker::compare_gimple_assign): Likewise.
* ipa-icf-gimple.h: New function.
diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 792a3e4..1b9ee85 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -452,6 +452,17 @@ func_checker::compare_tree_list_operand (tree t1, tree t2)
   return true;
 }
 
+/* Compares if both trees T1 and T2 have equal volatility.  */
+
+bool
+func_checker::compare_volatility (tree t1, tree t2)
+{
+  if (t1  t2)
+return TREE_THIS_VOLATILE (t1) == TREE_THIS_VOLATILE (t2);
+
+  return !(t1 || t2);
+}
+
 /* Verifies that trees T1 and T2, representing function declarations
are equivalent from perspective of ICF.  */
 
@@ -663,6 +674,9 @@ func_checker::compare_gimple_call (gimple s1, gimple s2)
   t1 = gimple_get_lhs (s1);
   t2 = gimple_get_lhs (s2);
 
+  if (!compare_volatility (t1, t2))
+return return_false_with_msg (different volatility for call statement);
+
   return compare_operand (t1, t2);
 }
 
@@ -696,8 +710,11 @@ func_checker::compare_gimple_assign (gimple s1, gimple s2)
 
   if (!compare_operand (arg1, arg2))
 	return false;
-}
 
+  if (!compare_volatility (arg1, arg2))
+	return return_false_with_msg (different volatility for assignment 
+	  statement);
+}
 
   return true;
 }
diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h
index 8487a2a..b791c21 100644
--- a/gcc/ipa-icf-gimple.h
+++ b/gcc/ipa-icf-gimple.h
@@ -209,6 +209,10 @@ public:
  two trees are semantically equivalent.  */
   bool compare_tree_list_operand (tree t1, tree t2);
 
+  /* Compares two tree list operands T1 and T2 and returns true if these
+ two trees are semantically equivalent.  */
+  bool compare_volatility (tree t1, tree t2);
+
   /* Verifies that trees T1 and T2, representing function declarations
  are equivalent from perspective of ICF.  */
   bool compare_function_decl (tree t1, tree t2);
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c
new file mode 100644
index 000..e70d72d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-31.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options -fipa-icf -fdump-ipa-icf-details  } */
+
+
+static int f(int t, int *a) __attribute__((noinline));
+
+static int g(int t, volatile int *a) __attribute__((noinline));
+static int g(int t, volatile int *a)
+{
+  int i;
+  int tt = 0;
+  for(i=0;it;i++)
+tt += *a;
+  return tt;
+}
+static int f(int t, int *a)
+{
+  int i;
+  int tt = 0;
+  for(i=0;it;i++)
+tt += *a;
+  return tt;
+}
+
+
+int main()
+{
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump Equal symbols: 0 icf  } } */
+/* { dg-final { scan-ipa-dump different volatility for assignment statement icf  } } */
+/* { dg-final { cleanup-ipa-dump icf } } */


[PATCH] Fix for PR63583

2014-10-19 Thread Martin Liška
Hello.

I added missing gimple_asm_string comparison for a function with an asm 
statement.
Bootstrap and regression tests still running, ready for trunk after it finishes?

Thank you,
Martin
gcc/ChangeLog:

2014-10-19  Martin Liska  mli...@suse.cz

* ipa-icf-gimple.c (func_checker::compare_gimple_asm):
Gimple tempate string is compared.

gcc/testsuite/ChangeLog:

2014-10-19  Martin Liska  mli...@suse.cz

* gcc.dg/ipa/pr63595.c: New test.
diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 792a3e4..1369b74 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -863,6 +863,9 @@ func_checker::compare_gimple_asm (gimple g1, gimple g2)
   if (gimple_asm_nclobbers (g1) != gimple_asm_nclobbers (g2))
 return false;
 
+  if (strcmp (gimple_asm_string (g1), gimple_asm_string (g2)) != 0)
+return return_false_with_msg (ASM strings are different);
+
   for (unsigned i = 0; i  gimple_asm_ninputs (g1); i++)
 {
   tree input1 = gimple_asm_input_op (g1, i);
diff --git a/gcc/testsuite/gcc.dg/ipa/pr63595.c b/gcc/testsuite/gcc.dg/ipa/pr63595.c
new file mode 100644
index 000..9c9f3bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr63595.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-ipa-icf-details  } */
+
+static int f(int t) __attribute__((noinline));
+
+static int g(int t) __attribute__((noinline));
+static int g(int t)
+{
+asm(addl %0, 1: +r(t));  
+  return t;
+}
+static int f(int t)
+{
+asm(addq %0, -1: +r(t));
+  return t;
+}
+
+
+int h(int t)
+{
+return f(t) + g(t);
+}
+
+/* { dg-final { scan-ipa-dump ASM strings are different icf  } } */
+/* { dg-final { scan-ipa-dump Equal symbols: 0 icf  } } */
+/* { dg-final { cleanup-ipa-dump icf } } */


[PATCH] g++.dg: add ipa.exp file

2014-03-28 Thread Martin Liška

Hi,
   I would like to add corresponding ipa.exp file for g++ that let me 
run: make -k check RUNTESTFLAGS=ipa.exp


Changelog:

2014-03-28  Martin Liska  mli...@suse.cz

* g++.dg/ipa.epx: Anologous file added to g++.dg folder.

OK for trunk?

Thank you,
Martin
diff --git a/gcc/testsuite/g++.dg/ipa/ipa.exp b/gcc/testsuite/g++.dg/ipa/ipa.exp
new file mode 100644
index 000..af7b8a7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/ipa.exp
@@ -0,0 +1,35 @@
+# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# http://www.gnu.org/licenses/.
+
+# G++ testsuite that uses the `dg.exp' driver.
+
+# Load support procs.
+load_lib g++-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CXXFLAGS
+if ![info exists DEFAULT_CXXFLAGS] then {
+set DEFAULT_CXXFLAGS  -pedantic-errors -Wno-long-long
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[C\]]]  $DEFAULT_CXXFLAGS
+
+# All done.
+dg-finish


Re: Fix indirect call profiling for COMDAT symbols

2014-04-11 Thread Martin Liška

On 04/11/2014 08:00 AM, Jan Hubicka wrote:

Hi,
while looking into firefox profiles, I noticed that we miss devirtualizations
to comdat symbols, because we manage to get different profile_id in each
unit.  This is easily fixed by the following patch that makes profiled_id
to by crc32 of the symbol name in this case.

Bootstrapped/regtested x86_64-linux, tested with firefox, will
commit it tomorrow.

* coverage.c (coverage_compute_profile_id): Make stable for
global symbols
* ipa-utils.c (ipa_merge_profiles): Merge profile_id.
* lto/lto-symtab.c (lto_cgraph_replace_node): Don't re-merge
tp_first_run.
Index: coverage.c
===
--- coverage.c  (revision 209170)
+++ coverage.c  (working copy)
@@ -555,18 +555,31 @@ coverage_compute_lineno_checksum (void)
  unsigned
  coverage_compute_profile_id (struct cgraph_node *n)
  {
-  expanded_location xloc
-= expand_location (DECL_SOURCE_LOCATION (n-decl));
-  unsigned chksum = xloc.line;
+  unsigned chksum;
  
-  chksum = coverage_checksum_string (chksum, xloc.file);

-  chksum = coverage_checksum_string
-(chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl)));
-  if (first_global_object_name)
-chksum = coverage_checksum_string
-  (chksum, first_global_object_name);
-  chksum = coverage_checksum_string
-(chksum, aux_base_name);
+  /* Externally visible symbols have unique name.  */
+  if (TREE_PUBLIC (n-decl) || DECL_EXTERNAL (n-decl))
+{
+  /* Do not use coverage_checksum_string here; we really want unique
+symbol name id.  */
+  chksum = crc32_string
+   (0, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl)));
+}
+  else
+{
+  expanded_location xloc
+   = expand_location (DECL_SOURCE_LOCATION (n-decl));
+
+  chksum = xloc.line;
+  chksum = coverage_checksum_string (chksum, xloc.file);
+  chksum = coverage_checksum_string
+   (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n-decl)));
+  if (first_global_object_name)
+   chksum = coverage_checksum_string
+ (chksum, first_global_object_name);
+  chksum = coverage_checksum_string
+   (chksum, aux_base_name);
+}
  
/* Non-negative integers are hopefully small enough to fit in all targets.  */

return chksum  0x7fff;
Index: ipa-utils.c
===
--- ipa-utils.c (revision 209170)
+++ ipa-utils.c (working copy)
@@ -660,6 +660,21 @@ ipa_merge_profiles (struct cgraph_node *
if (dst-tp_first_run  src-tp_first_run  src-tp_first_run)
  dst-tp_first_run = src-tp_first_run;
  
+  if (src-profile_id)

+{
+  if (!dst-profile_id)
+   dst-profile_id = src-profile_id;
+  else
+   {
+if (src-profile_id != dst-profile_id)
+  {
+dump_cgraph_node (stderr, src);
+dump_cgraph_node (stderr, dst);
+  }
+gcc_assert (src-profile_id == dst-profile_id);
+   }
+}
+
if (!dst-count)
  return;
if (cgraph_dump_file)
Index: lto/lto-symtab.c
===
--- lto/lto-symtab.c(revision 209170)
+++ lto/lto-symtab.c(working copy)
@@ -91,12 +91,6 @@ lto_cgraph_replace_node (struct cgraph_n
if (node-decl != prevailing_node-decl)
  cgraph_release_function_body (node);
  
-  /* Time profile merging */

-  if (node-tp_first_run)
-prevailing_node-tp_first_run = prevailing_node-tp_first_run ?
-  MIN (prevailing_node-tp_first_run, node-tp_first_run) :
-  node-tp_first_run;
-


Hello Honza,
   I just want to ask if this time profile merging is not necessary any 
more?


Martin

/* Finally remove the replaced node.  */
cgraph_remove_node (node);
  }




Re: Avoid unnecesary GGC runs during LTO

2014-04-11 Thread Martin Liška

On 04/11/2014 08:07 AM, Jan Hubicka wrote:

Hi,
while looking into -ftime-report, I noticed that ggc can take up to 10% of WPA 
memory
while it does almost nothing: it is run just after streaming that explicitly
frees memory that becomes unreachable.  The first GGC run usually saves at
most 1% of memory and then it is never run again.
I believe this ought to also help in case we get into swap, since ltranses will
also ggc less.

Bootstrapped/regtested x86_64-linux, OK?

Hi!

I applied both patches you sent today and there are Firefox LTO -O3 
results: 
https://drive.google.com/file/d/0B0pisUJ80pO1ajRzLWFneTJpcE0/edit?usp=sharing

It shows that you saved a bit memory in WPA.

Martin



Honza

* lto.c (read_cgraph_and_symbols): Grow ggc memory after streaming.
* ggc.h (ggc_grow): New function.
* ggc-none.c (ggc_grow): New function.
* ggc-page.c (ggc_grow): Likewise.
Index: ggc.h
===
--- ggc.h   (revision 209170)
+++ ggc.h   (working copy)
@@ -225,6 +225,9 @@ extern const char *ggc_alloc_string_stat
 function is called, not during allocations.  */
  extern void ggc_collect   (void);
  
+/* Assume that all GGC memory is reachable and grow the limits for next collection. */

+extern void ggc_grow (void);
+
  /* Register an additional root table.  This can be useful for some
 plugins.  Does nothing if the passed pointer is NULL. */
  extern void ggc_register_root_tab (const struct ggc_root_tab *);
Index: lto/lto.c
===
--- lto/lto.c   (revision 209170)
+++ lto/lto.c   (working copy)
@@ -2999,6 +3000,10 @@ read_cgraph_and_symbols (unsigned nfiles
gimple_canonical_types = NULL;
delete canonical_type_hash_cache;
canonical_type_hash_cache = NULL;
+
+  /* At this stage we know that majority of GGC memory is reachable.
+ Growing the limits prevents unnecesary invocation of GGC.  */
+  ggc_grow ();
ggc_collect ();
  
/* Set the hooks so that all of the ipa passes can read in their data.  */

Index: ggc-none.c
===
--- ggc-none.c  (revision 209170)
+++ ggc-none.c  (working copy)
@@ -63,3 +63,8 @@ ggc_free (void *p)
  {
free (p);
  }
+
+void
+ggc_grow (void)
+{
+}
Index: ggc-page.c
===
--- ggc-page.c  (revision 209170)
+++ ggc-page.c  (working copy)
@@ -2095,6 +2095,19 @@ ggc_collect (void)
  fprintf (G.debug_file, END COLLECTING\n);
  }
  
+/* Assume that all GGC memory is reachable and grow the limits for next collection. */

+
+void
+ggc_grow (void)
+{
+#ifndef ENABLE_CHECKING
+  G.allocated_last_gc = MAX (G.allocated_last_gc,
+G.allocated);
+#endif
+  if (!quiet_flag)
+fprintf (stderr,  {GC start %luk} , (unsigned long) G.allocated / 1024);
+}
+
  /* Print allocation statistics.  */
  #define SCALE(x) ((unsigned long) ((x)  1024*10 \
  ? (x) \




Re: ipa-visibility TLC 2/n

2014-05-26 Thread Martin Liška


On 05/25/2014 07:54 AM, Jan Hubicka wrote:

Hi,
this patch adds code to rerite references in vtable initializers to local 
aliases
when doing so is a win.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* ipa-visibility.c (can_replace_by_local_alias_in_vtable): New function.
(update_vtable_references): New function.
(function_and_variable_visibility): Rewrite also vtable initializers.
* varpool.c (cgraph_variable_initializer_availability): Remove assert.
Index: varpool.c
===
--- varpool.c   (revision 210908)
+++ varpool.c   (working copy)
@@ -355,7 +355,6 @@ varpool_add_new_variable (tree decl)
  enum availability
  cgraph_variable_initializer_availability (varpool_node *node)
  {
-  gcc_assert (cgraph_function_flags_ready);
if (!node-definition)
  return AVAIL_NOT_AVAILABLE;
if (!TREE_PUBLIC (node-decl))
Index: ipa-visibility.c
===
--- ipa-visibility.c(revision 210908)
+++ ipa-visibility.c(working copy)
@@ -343,6 +343,36 @@ can_replace_by_local_alias (symtab_node
   !symtab_can_be_discarded (node));
  }
  
+/* Return true if we can replace refernece to NODE by local alias


Hello Jan,

s/refernece/reference


+   within a virtual table.  Generally we can replace function pointers
+   and virtual table pointers.  */
+
+bool
+can_replace_by_local_alias_in_vtable (symtab_node *node)
+{
+  if (is_a varpool_node * (node)
+   !DECL_VIRTUAL_P (node-decl))
+return false;
+  return can_replace_by_local_alias (node);
+}
+
+/* walk_tree callback that rewrites initializer references.   */
+
+static tree
+update_vtable_references (tree *tp, int *walk_subtrees, void *data 
ATTRIBUTE_UNUSED)
+{
+  if (TREE_CODE (*tp) == VAR_DECL
+  || TREE_CODE (*tp) == FUNCTION_DECL)
+{
+  if (can_replace_by_local_alias_in_vtable (symtab_get_node (*tp)))
+   *tp = symtab_nonoverwritable_alias (symtab_get_node (*tp))-decl;
+  *walk_subtrees = 0;
+}
+  else if (IS_TYPE_OR_DECL_P (*tp))
+*walk_subtrees = 0;
+  return NULL;
+}
+
  /* In LTO we can remove COMDAT groups and weak symbols.
 Either turn them into normal symbols or external symbol depending on
 resolution info.  */
@@ -625,6 +655,34 @@ function_and_variable_visibility (bool w
  vnode-resolution = LDPR_PREVAILING_DEF_IRONLY;
}
update_visibility_by_resolution_info (vnode);
+
+  /* Update virutal tables to point to local aliases where possible.  */

s/virutal/virtual

Martin

+  if (DECL_VIRTUAL_P (vnode-decl)
+  !DECL_EXTERNAL (vnode-decl))
+   {
+ int i;
+ struct ipa_ref *ref;
+ bool found = false;
+
+ /* See if there is something to update.  */
+ for (i = 0; ipa_ref_list_referring_iterate (vnode-ref_list,
+ i, ref); i++)
+   if (ref-use == IPA_REF_ADDR
+can_replace_by_local_alias_in_vtable (ref-referred))
+ {
+   found = true;
+   break;
+ }
+ if (found)
+   {
+ struct pointer_set_t *visited_nodes = pointer_set_create ();
+ walk_tree (DECL_INITIAL (vnode-decl),
+update_vtable_references, NULL, visited_nodes);
+ pointer_set_destroy (visited_nodes);
+ ipa_remove_all_references (vnode-ref_list);
+ record_references_in_initializer (vnode-decl, false);
+   }
+   }
  }
  
if (dump_file)




[PATCH 1/4] Make coverage_compute_cfg_checksum callable with an argument

2014-05-30 Thread Martin Liška

Hello,
  this is a small patchset that prepares API for new IPA Identical code folding 
pass. The patch adds an argument for coverage_compute_cfg_checksum.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

* coverage.h (coverage_compute_cfg_checksum): Argument added.
* coverage.c (coverage_compute_cfg_checksum): Likewise.
* profile.c (branch_prob): Likewise.

diff --git a/gcc/coverage.c b/gcc/coverage.c
index 5e9005e..9d81387 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -594,12 +594,12 @@ coverage_compute_profile_id (struct cgraph_node *n)
but the compiler won't detect the change and use the wrong profile data.  */
 
 unsigned

-coverage_compute_cfg_checksum (void)
+coverage_compute_cfg_checksum (struct function *fn)
 {
   basic_block bb;
-  unsigned chksum = n_basic_blocks_for_fn (cfun);
+  unsigned chksum = n_basic_blocks_for_fn (fn);
 
-  FOR_EACH_BB_FN (bb, cfun)

+  FOR_EACH_BB_FN (bb, fn)
 {
   edge e;
   edge_iterator ei;
diff --git a/gcc/coverage.h b/gcc/coverage.h
index 81f87a6..392080c 100644
--- a/gcc/coverage.h
+++ b/gcc/coverage.h
@@ -32,8 +32,8 @@ extern int coverage_begin_function (unsigned, unsigned);
 /* Complete the coverage information for the current function.  */
 extern void coverage_end_function (unsigned, unsigned);
 
-/* Compute the control flow checksum for the current function.  */

-extern unsigned coverage_compute_cfg_checksum (void);
+/* Compute the control flow checksum for the FUNCTION given as argument.  */
+extern unsigned coverage_compute_cfg_checksum (struct function *);
 
 /* Compute the profile id of function N.  */

 extern unsigned coverage_compute_profile_id (struct cgraph_node *n);
diff --git a/gcc/profile.c b/gcc/profile.c
index 3282ee7..4e82eab 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -1195,7 +1195,7 @@ branch_prob (void)
  the checksum in only once place, since it depends on the shape
  of the control flow which can change during
  various transformations.  */
-  cfg_checksum = coverage_compute_cfg_checksum ();
+  cfg_checksum = coverage_compute_cfg_checksum (cfun);
   lineno_checksum = coverage_compute_lineno_checksum ();
 
   /* Write the data from which gcov can reconstruct the basic block

--
1.8.4.5




[PATCH 2/4] Enhancement of call graph API

2014-05-30 Thread Martin Liška

Hello,
   this patch enhances callgraph API to enable more precise control of 
expand_thunk; another function becomes global.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

* cgraph.h (expand_thunk): New argument added.
(address_taken_from_non_vtable_p): New global function.
* ipa-visibility.c (address_taken_from_non_vtable_p): Likewise.
* cgraphclones.c (duplicate_thunk_for_node): Argument added to call.
* cgraphunit.c (analyze_function): Likewise.
(assemble_thunks_and_aliases): Argument added to call.
(expand_thunk): New argument forces to produce GIMPLE thunk.

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e5aa833..bfd3d91 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -911,7 +911,7 @@ void fixup_same_cpp_alias_visibility (symtab_node *, 
symtab_node *target, tree);
 IN_SSA is true if the gimple is in SSA.  */
 basic_block init_lowered_empty_function (tree, bool);
 void cgraph_reset_node (struct cgraph_node *);
-bool expand_thunk (struct cgraph_node *, bool);
+bool expand_thunk (struct cgraph_node *, bool, bool);
 
 /* In cgraphclones.c  */
 
@@ -956,6 +956,7 @@ void ipa_record_stmt_references (struct cgraph_node *, gimple);
 
 /* In ipa.c  */

 bool symtab_remove_unreachable_nodes (bool, FILE *);
+bool address_taken_from_non_vtable_p (symtab_node *node);
 cgraph_node_set cgraph_node_set_new (void);
 cgraph_node_set_iterator cgraph_node_set_find (cgraph_node_set,
   struct cgraph_node *);
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c
index 4387b99..75eba6d 100644
--- a/gcc/cgraphclones.c
+++ b/gcc/cgraphclones.c
@@ -353,7 +353,7 @@ duplicate_thunk_for_node (cgraph_node *thunk, cgraph_node 
*node,
  CGRAPH_FREQ_BASE);
   e-call_stmt_cannot_inline_p = true;
   cgraph_call_edge_duplication_hooks (thunk-callees, e);
-  if (!expand_thunk (new_thunk, false))
+  if (!expand_thunk (new_thunk, false, false))
 new_thunk-analyzed = true;
   cgraph_call_node_duplication_hooks (thunk, new_thunk);
   return new_thunk;
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index e19b0a2..55bf378 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -610,7 +610,7 @@ analyze_function (struct cgraph_node *node)
 {
   cgraph_create_edge (node, cgraph_get_node (node-thunk.alias),
  NULL, 0, CGRAPH_FREQ_BASE);
-  if (!expand_thunk (node, false))
+  if (!expand_thunk (node, false, false))
{
  node-thunk.alias = NULL;
  node-analyzed = true;
@@ -1466,11 +1466,13 @@ thunk_adjust (gimple_stmt_iterator * bsi,
 }
 
 /* Expand thunk NODE to gimple if possible.

+   When FORCE_GIMPLE_THUNK is true, gimple thunk is created and
+   no assembler is produced.
When OUTPUT_ASM_THUNK is true, also produce assembler for
thunks that are not lowered.  */
 
 bool

-expand_thunk (struct cgraph_node *node, bool output_asm_thunks)
+expand_thunk (struct cgraph_node *node, bool output_asm_thunks, bool 
force_gimple_thunk)
 {
   bool this_adjusting = node-thunk.this_adjusting;
   HOST_WIDE_INT fixed_offset = node-thunk.fixed_offset;
@@ -1481,7 +1483,7 @@ expand_thunk (struct cgraph_node *node, bool 
output_asm_thunks)
   tree a;
 
 
-  if (this_adjusting

+  if (!force_gimple_thunk  this_adjusting
targetm.asm_out.can_output_mi_thunk (thunk_fndecl, fixed_offset,
  virtual_value, alias))
 {
@@ -1719,7 +1721,7 @@ assemble_thunks_and_aliases (struct cgraph_node *node)
 
 	e = e-next_caller;

assemble_thunks_and_aliases (thunk);
-expand_thunk (thunk, true);
+expand_thunk (thunk, true, false);
   }
 else
   e = e-next_caller;
diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
index dc22b2e..7886722 100644
--- a/gcc/ipa-visibility.c
+++ b/gcc/ipa-visibility.c
@@ -115,7 +115,7 @@ cgraph_local_node_p (struct cgraph_node *node)
 }
 
 /* Return true when there is a reference to node and it is not vtable.  */

-static bool
+bool
 address_taken_from_non_vtable_p (symtab_node *node)
 {
   int i;
--
1.8.4.5




[PATCH 3/4] New attribute lookup function addition

2014-05-30 Thread Martin Liška

Hi,
   this patch introduces a new function lookup_attribute_starting that can find 
all attributes starting with a specified string. Purpose of the function is to 
be able to identify e.g. if a function has any 'omp' attribute.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

* tree.h (private_lookup_attribute_starting): New function.
(lookup_attribute_starting): Likewise.
* tree.c (private_lookup_attribute_starting): Likewise.

diff --git a/gcc/tree.c b/gcc/tree.c
index cf7e362..9c6b68c 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5758,6 +5758,37 @@ private_lookup_attribute (const char *attr_name, size_t 
attr_len, tree list)
   return list;
 }
 
+tree

+private_lookup_attribute_starting (const char *attr_name, size_t attr_len, 
tree list)
+{
+  while (list)
+{
+  size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list));
+
+  if (attr_len  ident_len)
+   {
+ list = TREE_CHAIN (list);
+ continue;
+   }
+
+  const char *p = IDENTIFIER_POINTER (get_attribute_name (list));
+
+  if (strncmp (attr_name, p, attr_len) == 0)
+   break;
+
+  /* TODO: If we made sure that attributes were stored in the
+canonical form without '__...__' (ie, as in 'text' as opposed
+to '__text__') then we could avoid the following case.  */
+  if (p[0] == '_'  p[1] == '_'  strncmp (attr_name, p + 2, attr_len) 
== 0)
+   break;
+
+  list = TREE_CHAIN (list);
+}
+
+  return list;
+}
+
+
 /* A variant of lookup_attribute() that can be used with an identifier
as the first argument, and where the identifier can be either
'text' or '__text__'.
diff --git a/gcc/tree.h b/gcc/tree.h
index 9fe7360..3e1b113 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3731,6 +3731,10 @@ extern tree merge_type_attributes (tree, tree);
and you should never call it directly.  */
 extern tree private_lookup_attribute (const char *, size_t, tree);
 
+/* This function is a private implementation detail

+   of lookup_attribute_starting() and you should never call it directly.  */
+extern tree private_lookup_attribute_starting (const char *, size_t, tree);
+
 /* Given an attribute name ATTR_NAME and a list of attributes LIST,
return a pointer to the attribute's list element if the attribute
is part of the list, or NULL_TREE if not found.  If the attribute
@@ -3753,6 +3757,23 @@ lookup_attribute (const char *attr_name, tree list)
 return private_lookup_attribute (attr_name, strlen (attr_name), list);
 }
 
+/* Given an attribute name ATTR_NAME and a list of attributes LIST,

+   return a pointer to the attribute's list element if the attribute
+   starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not
+   '__text__').  */
+
+static inline tree
+lookup_attribute_starting (const char *attr_name, tree list)
+{
+  gcc_checking_assert (attr_name[0] != '_');
+  /* In most cases, list is NULL_TREE.  */
+  if (list == NULL_TREE)
+return NULL_TREE;
+  else
+return private_lookup_attribute_starting (attr_name, strlen (attr_name), 
list);
+}
+
+
 /* This function is a private implementation detail of
is_attribute_p() and you should never call it directly.  */
 extern bool private_is_attribute_p (const char *, size_t, const_tree);
--
1.8.4.5




Re: [PATCH 1/4] Make coverage_compute_cfg_checksum callable with an argument

2014-06-02 Thread Martin Liška


On 05/30/2014 06:28 PM, Jeff Law wrote:

On 05/30/14 00:47, Martin Liška wrote:

Hello,
   this is a small patchset that prepares API for new IPA Identical code
folding pass. The patch adds an argument for coverage_compute_cfg_checksum.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

 * coverage.h (coverage_compute_cfg_checksum): Argument added.
 * coverage.c (coverage_compute_cfg_checksum): Likewise.
 * profile.c (branch_prob): Likewise.

The block comment for coverage_compute_cfg_checksum needs to be updated.  We're 
no longer computing the checksum for the current function (cfun), but instead 
computing the checksum for the argument FN.


Hi,
   thank you for your feedback, I've just fixed the patch and will commit soon.

Martin




Otherwise OK for the trunk.

jeff




Re: [PATCH 2/4] Enhancement of call graph API

2014-06-02 Thread Martin Liška


On 05/30/2014 06:42 PM, Jeff Law wrote:

On 05/30/14 00:47, Martin Liška wrote:

Hello,
this patch enhances callgraph API to enable more precise control of
expand_thunk; another function becomes global.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

 * cgraph.h (expand_thunk): New argument added.
 (address_taken_from_non_vtable_p): New global function.
 * ipa-visibility.c (address_taken_from_non_vtable_p): Likewise.
 * cgraphclones.c (duplicate_thunk_for_node): Argument added to call.
 * cgraphunit.c (analyze_function): Likewise.
 (assemble_thunks_and_aliases): Argument added to call.
 (expand_thunk): New argument forces to produce GIMPLE thunk.

Only concern here is the location of the prototype for 
address_taken_from_non_vtable_p.  Though I guess other things form 
ipa-visibility.c are prototyped in cgraph.h.

Can you put the prototype here in cgraph.h:


/* In ipa-visibility.c */
bool cgraph_local_node_p (struct cgraph_node *);

Otherwise OK.

Real curious to see the meat of the optimization now :-)


Hello,
   thanks too. It was really a wrong place for the declaration.

Yeah, the optimization will be juicy :)

Martin



jeff





Re: [PATCH 3/4] New attribute lookup function addition

2014-06-02 Thread Martin Liška


On 05/30/2014 06:37 PM, Jeff Law wrote:

On 05/30/14 00:49, Martin Liška wrote:

Hi,
this patch introduces a new function lookup_attribute_starting that
can find all attributes starting with a specified string. Purpose of the
function is to be able to identify e.g. if a function has any 'omp'
attribute.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

 * tree.h (private_lookup_attribute_starting): New function.
 (lookup_attribute_starting): Likewise.
 * tree.c (private_lookup_attribute_starting): Likewise.

private_lookup_attribute_starting needs a block comment.


Added.




+tree
+private_lookup_attribute_starting (const char *attr_name, size_t
attr_len, tree list)

Long line needs to be wrapped?   Please review the patch for lines that need 
wrapping at 80 columns.

Fixed too.


So it's really a lookup by prefix, so I'd probably use a name like
lookup_attribute_by_prefix.  Why private_ in the function name?

I used the same construction as for function 'private_is_attribute_p'; I hope 
the construction is fine?



It appears it just returns the first attribute from LIST with the given prefix. 
 Presumably you use it iteratively.


+/* Given an attribute name ATTR_NAME and a list of attributes LIST,
+   return a pointer to the attribute's list element if the attribute
+   starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not
+   '__text__').  */
+
+static inline tree
+lookup_attribute_starting (const char *attr_name, tree list)
+{
+  gcc_checking_assert (attr_name[0] != '_');
+  /* In most cases, list is NULL_TREE.  */
+  if (list == NULL_TREE)
+return NULL_TREE;
+  else
+return private_lookup_attribute_starting (attr_name, strlen
(attr_name), list);
+}

So again, I prefer prefix rather than starting.  Similarly this is meant to 
be called iteratively since you only get the first attribute with the given prefix, right?

I added a comment that it returns just such first argument.

Is the reworked patch OK for trunk?

Martin


OK with the nit fixes mentioned above.


Jeff


From be3ab469ee70ff3de434f5326c1a2eabf07da3ed Mon Sep 17 00:00:00 2001
Message-Id: be3ab469ee70ff3de434f5326c1a2eabf07da3ed.1401718733.git.mli...@suse.cz
In-Reply-To: e245d67afb610a2f210b83382b49f75731ba68b8.1401718733.git.mli...@suse.cz
References: e245d67afb610a2f210b83382b49f75731ba68b8.1401718733.git.mli...@suse.cz
From: mliska mli...@suse.cz
Date: Thu, 29 May 2014 17:18:34 +0200
Subject: [PATCH 3/4] New attribute lookup function addition
To: gcc-patches@gcc.gnu.org

Hi,
   this patch introduces a new function lookup_attribute_starting that can find all attributes starting with a specified string. Purpose of the function is to be able to identify e.g. if a function has any 'omp' attribute.

Bootstrapped and tested on x86_64-linux.
OK for trunk?

Thanks,
Martin

2014-05-29  Martin Liska  mli...@suse.cz

	* tree.h (private_lookup_attribute_starting): New function.
	(lookup_attribute_starting): Likewise.
	* tree.c (private_lookup_attribute_starting): Likewise.

diff --git a/gcc/tree.c b/gcc/tree.c
index cf7e362..f983408 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5758,6 +5758,44 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list)
   return list;
 }
 
+/* Given an attribute name ATTR_NAME and a list of attributes LIST,
+   return a pointer to the attribute's list first element if the attribute
+   starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not
+   '__text__').  */
+
+tree
+private_lookup_attribute_by_prefix (const char *attr_name, size_t attr_len,
+tree list)
+{
+  while (list)
+{
+  size_t ident_len = IDENTIFIER_LENGTH (get_attribute_name (list));
+
+  if (attr_len  ident_len)
+	{
+	  list = TREE_CHAIN (list);
+	  continue;
+	}
+
+  const char *p = IDENTIFIER_POINTER (get_attribute_name (list));
+
+  if (strncmp (attr_name, p, attr_len) == 0)
+	break;
+
+  /* TODO: If we made sure that attributes were stored in the
+	 canonical form without '__...__' (ie, as in 'text' as opposed
+	 to '__text__') then we could avoid the following case.  */
+  if (p[0] == '_'  p[1] == '_' 
+	  strncmp (attr_name, p + 2, attr_len) == 0)
+	break;
+
+  list = TREE_CHAIN (list);
+}
+
+  return list;
+}
+
+
 /* A variant of lookup_attribute() that can be used with an identifier
as the first argument, and where the identifier can be either
'text' or '__text__'.
diff --git a/gcc/tree.h b/gcc/tree.h
index 9fe7360..e592280 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3731,6 +3731,10 @@ extern tree merge_type_attributes (tree, tree);
and you should never call it directly.  */
 extern tree private_lookup_attribute (const char *, size_t, tree);
 
+/* This function is a private implementation detail
+   of lookup_attribute_by_prefix() and you should never call it directly.  */
+extern tree private_lookup_attribute_by_prefix (const char *, size_t, tree

[PATCH] PR 61642

2014-06-11 Thread Martin Liška

Hello,
   I send patch for PR61642.

Changelog:
2014-06-11  Martin Liska  mli...@suse.cz

* ipa-prop.c (ipa_make_edge_direct_to_target): Check that gimple call
statement is reachable.

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Ready for trunk?

Martin
diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
index d02093a..1ffd173 100644
--- a/gcc/ipa-prop.c
+++ b/gcc/ipa-prop.c
@@ -2673,13 +2673,19 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target)
 
   if (dump_enabled_p ())
 	{
-	  location_t loc = gimple_location (ie-call_stmt);
-	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
-			   discovered direct call to non-function in %s/%i, 
-			   making it __builtin_unreachable\n,
-   ie-caller-name (),
-   ie-caller-order);
+	  const char *fmt = discovered direct call to non-function in %s/%i, 
+making it __builtin_unreachable\n;
+
+	  if (ie-call_stmt)
+	{
+		  location_t loc = gimple_location (ie-call_stmt);
+		  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt,
+   ie-caller-name (), ie-caller-order);
+		}
+	 else if (dump_file)
+	   fprintf (dump_file, fmt, ie-caller-name (), ie-caller-order);
 	}
+
 	  target = builtin_decl_implicit (BUILT_IN_UNREACHABLE);
 	  callee = cgraph_get_create_node (target);
 	  unreachable = true;
@@ -2739,10 +2745,18 @@ ipa_make_edge_direct_to_target (struct cgraph_edge *ie, tree target)
  }
   if (dump_enabled_p ())
 {
-  location_t loc = gimple_location (ie-call_stmt);
-  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
-		   converting indirect call in %s to direct call to %s\n,
-		   ie-caller-name (), callee-name ());
+  const char *fmt = converting indirect call in %s to direct call to %s\n;
+
+  if (ie-call_stmt)
+{
+	  location_t loc = gimple_location (ie-call_stmt);
+
+	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, fmt,
+			   ie-caller-name (), callee-name ());
+
+}
+  else if (dump_file)
+	fprintf (dump_file, fmt, ie-caller-name (), callee-name ());
 }
   ie = cgraph_make_edge_direct (ie, callee);
   es = inline_edge_summary (ie);


Re: [PATCH] PR 61642

2014-06-11 Thread Martin Liška


On 06/11/2014 02:01 PM, Richard Biener wrote:

On Wed, Jun 11, 2014 at 1:58 PM, Martin Liška mli...@suse.cz wrote:

Hello,
I send patch for PR61642.

61462?


Hello,
   that's bogus, I am not a visionary that creates a patch for a future 
bug :)


Thanks,
Martin




Changelog:
2014-06-11  Martin Liska  mli...@suse.cz


PR ipa/61462

 * ipa-prop.c (ipa_make_edge_direct_to_target): Check that gimple call
 statement is reachable.

Bootstrapped and regtested on x86_64-unknown-linux-gnu.

Ready for trunk?

Ok with those adjustments.

Richard.


Martin




Re: Make ipa-ref somewhat less stupid

2014-06-13 Thread Martin Liška

On 06/10/2014 08:34 AM, Jan Hubicka wrote:

Hi,
ipa-reference is somewhat stupid and builds its data sets for all variables 
including
addressable and public one just to prune them out after all bitmaps are 
constructed.
This used to make sense when the profile generation happened at compile time, 
but
since ipa_ref datastructure was intrdocued this is a nonsense.

Martin: It may be interesting to check if this solves the memory use issues with
chrome.  We also may be able to re-enable ipa-ref with profile-generate as
I think all the datastructures are considered to have address taken.


Hi,
   there is a link to chromium stats: 
https://drive.google.com/file/d/0B0pisUJ80pO1VmNHeklCRWVkOUU/edit?usp=sharing

Both compilation were run with '-flto=6', where the upper graph adds 
'-fprofile-generate'. Memory footprint is IMHO acceptable, but compilation 
process takes twice longer with profile generation. Yeah, chromium contains a 
really big code base :)

Martin




Honza

Bootstrapped/regtested x86_64-linux.

* ipa-reference.c (is_proper_for_analysis): Exclude addressable and 
public
vars.
(intersect_static_var_sets): Remove.
(propagate): Do not prune local statics.
Index: ipa-reference.c
===
--- ipa-reference.c (revision 211364)
+++ ipa-reference.c (working copy)
@@ -243,6 +243,17 @@ is_proper_for_analysis (tree t)
if (TREE_READONLY (t))
  return false;
  
+  /* We can not track variables with address taken.  */

+  if (TREE_ADDRESSABLE (t))
+return false;
+
+  /* TODO: We could track public variables that are not addressable, but 
currently
+ frontends don't give us those.  */
+  if (TREE_PUBLIC (t))
+return false;
+
+  /* TODO: Check aliases.  */
+
/* This is a variable we care about.  Check if we have seen it
   before, and if not add it the set of variables we care about.  */
if (all_module_statics
@@ -312,26 +323,6 @@ union_static_var_sets (bitmap x, bitmap
return x == all_module_statics;
  }
  
-/* Compute X = Y, taking into account the possibility that

-   X may become the maximum set.  */
-
-static bool
-intersect_static_var_sets (bitmap x, bitmap y)
-{
-  if (x != all_module_statics)
-{
-  bitmap_and_into (x, y);
-  /* As with union_static_var_sets, reducing to the maximum
-set as early as possible is an overall win.  */
-  if (bitmap_equal_p (x, all_module_statics))
-   {
- BITMAP_FREE (x);
- x = all_module_statics;
-   }
-}
-  return x == all_module_statics;
-}
-
  /* Return a copy of SET on the bitmap obstack containing SET.
 But if SET is NULL or the maximum set, return that instead.  */
  
@@ -669,7 +660,6 @@ static unsigned int

  propagate (void)
  {
struct cgraph_node *node;
-  varpool_node *vnode;
struct cgraph_node **order =
  XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
int order_pos;
@@ -681,25 +671,6 @@ propagate (void)
ipa_discover_readonly_nonaddressable_vars ();
generate_summary ();
  
-  /* Now we know what vars are really statics; prune out those that aren't.  */

-  FOR_EACH_VARIABLE (vnode)
-if (vnode-externally_visible
-   || TREE_ADDRESSABLE (vnode-decl)
-   || TREE_READONLY (vnode-decl)
-   || !is_proper_for_analysis (vnode-decl)
-   || !vnode-definition)
-  bitmap_clear_bit (all_module_statics, DECL_UID (vnode-decl));
-
-  /* Forget info we collected just for fun on variables that turned out to be
- non-local.  */
-  FOR_EACH_DEFINED_FUNCTION (node)
-{
-  ipa_reference_local_vars_info_t node_l;
-  node_l = get_reference_vars_info (node)-local;
-  intersect_static_var_sets (node_l-statics_read, all_module_statics);
-  intersect_static_var_sets (node_l-statics_written, all_module_statics);
-}
-
/* Propagate the local information through the call graph to produce
   the global information.  All the nodes within a cycle will have
   the same info so we collapse cycles first.  Then we can do the




Re: Make ipa-ref somewhat less stupid

2014-06-17 Thread Martin Liška


On 06/16/2014 10:01 AM, Jan Hubicka wrote:

On 06/10/2014 08:34 AM, Jan Hubicka wrote:

Hi,
ipa-reference is somewhat stupid and builds its data sets for all variables 
including
addressable and public one just to prune them out after all bitmaps are 
constructed.
This used to make sense when the profile generation happened at compile time, 
but
since ipa_ref datastructure was intrdocued this is a nonsense.

Martin: It may be interesting to check if this solves the memory use issues with
chrome.  We also may be able to re-enable ipa-ref with profile-generate as
I think all the datastructures are considered to have address taken.

Hi,
there is a link to chromium stats: 
https://drive.google.com/file/d/0B0pisUJ80pO1VmNHeklCRWVkOUU/edit?usp=sharing

Both compilation were run with '-flto=6', where the upper graph adds 
'-fprofile-generate'. Memory footprint is IMHO acceptable, but compilation 
process takes twice longer with profile generation. Yeah, chromium contains a 
really big code base :)

Yep, I wonder why WPA takes so much longer. Do you think you can build lto1
with --enable-gather-detailed-mem-stats and relink with -fpre-ipa-mem-report
-fpost-ipa-mem-report -fmem-report -Q and send me the output?  It would be nice
to push Chromium under 4GB of WPA :)

There's report you requested: 
https://drive.google.com/file/d/0B0pisUJ80pO1RlRRTVBxUG5vSlE/edit?usp=sharing , 
produced by -fno-profile-generate. With enabled -fprofile-generate, WPA stage 
cannot fit to 24GB memory with enabled memory stats.

Martin



Thanks a lot!
Honza




Re: [PATCH 1/5] New Identical Code Folding IPA pass

2014-06-18 Thread Martin Liška


On 06/17/2014 10:14 PM, David Malcolm wrote:

On Fri, 2014-06-13 at 12:24 +0200, mliska wrote:
[...snip...]

   Statistics about the pass:
   Inkscape: 11.95 MB - 11.44 MB (-4.27%)
   Firefox: 70.12 MB - 70.12 MB (-3.07%)

FWIW, you wrote 70.12 MB here for both before and after for Firefox, but
give a -3.07% change, which seems like a typo.

A 3.07% reduction from 70.12 MB would be 67.97 MB; was this what the
pass achieved?


Hi,
   it's typo, original size of FF is 72.34 MB. I hope -3.07% is the correctly 
evaluated achievement.

Thanks,
Martin



[...snip...]

Thanks (nice patch, btw)
Dave





Re: [PATCH 1/5] New Identical Code Folding IPA pass

2014-06-18 Thread Martin Liška


On 06/17/2014 10:09 PM, Paolo Carlini wrote:

Hi,

On 13/06/14 12:24, mliska wrote:

   The optimization is inspired by Microsoft /OPT:ICF optimization 
(http://msdn.microsoft.com/en-us/library/bxwfs976.aspx) that merges COMDAT 
sections with each function reside in a separate section.

In terms of C++ testcases, I'm wondering if you already double checked that the 
new pass already does well on the typical examples on which, I was told, the 
Microsoft optimization is known to do well, eg, code instantiating std::vector 
for different pointer types, or even long and long long on x86_64-linux, things 
like that.


I've just added another C++ test case:

#include vector

using namespace std;

static vectorvectorint * a;
static vectorvoid * b;

int main()
{
  return b.size() + a.size ();
}

where the pass identifies following equality:

Semantic equality hit:std::vector_Tp, _Alloc::size_type std::vector_Tp, _Alloc::size() const [with _Tp = 
std::vectorint*; _Alloc = std::allocatorstd::vectorint*; std::vector_Tp, _Alloc::size_type = long unsigned 
int]-std::vector_Tp, _Alloc::size_type std::vector_Tp, _Alloc::size() const [with _Tp = void*; _Alloc = 
std::allocatorvoid*; std::vector_Tp, _Alloc::size_type = long unsigned int]
Semantic equality hit:static void std::_Destroy_auxtrue::__destroy(_ForwardIterator, 
_ForwardIterator) [with _ForwardIterator = void**]-static void 
std::_Destroy_auxtrue::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator 
= std::vectorint**]
Semantic equality hit:void std::_Destroy(_ForwardIterator, _ForwardIterator) [with 
_ForwardIterator = void**]-void std::_Destroy(_ForwardIterator, _ForwardIterator) 
[with _ForwardIterator = std::vectorint**]
Semantic equality hit:void std::_Destroy(_ForwardIterator, _ForwardIterator, std::allocator_T2) [with 
_ForwardIterator = void**; _Tp = void*]-void std::_Destroy(_ForwardIterator, _ForwardIterator, 
std::allocator_T2) [with _ForwardIterator = std::vectorint**; _Tp = std::vectorint*]
Semantic equality hit:void __gnu_cxx::new_allocator_Tp::deallocate(__gnu_cxx::new_allocator_Tp::pointer, 
__gnu_cxx::new_allocator_Tp::size_type) [with _Tp = void*; __gnu_cxx::new_allocator_Tp::pointer = void**; 
__gnu_cxx::new_allocator_Tp::size_type = long unsigned int]-void 
__gnu_cxx::new_allocator_Tp::deallocate(__gnu_cxx::new_allocator_Tp::pointer, __gnu_cxx::new_allocator_Tp::size_type) [with _Tp = 
std::vectorint*; __gnu_cxx::new_allocator_Tp::pointer = std::vectorint**; __gnu_cxx::new_allocator_Tp::size_type = long 
unsigned int]
Semantic equality hit:static void __gnu_cxx::__alloc_traits_Alloc::deallocate(_Alloc, __gnu_cxx::__alloc_traits_Alloc::pointer, 
__gnu_cxx::__alloc_traits_Alloc::size_type) [with _Alloc = std::allocatorvoid*; __gnu_cxx::__alloc_traits_Alloc::pointer = void**; 
__gnu_cxx::__alloc_traits_Alloc::size_type = long unsigned int]-static void __gnu_cxx::__alloc_traits_Alloc::deallocate(_Alloc, 
__gnu_cxx::__alloc_traits_Alloc::pointer, __gnu_cxx::__alloc_traits_Alloc::size_type) [with _Alloc = std::allocatorstd::vectorint*; 
__gnu_cxx::__alloc_traits_Alloc::pointer = std::vectorint**; __gnu_cxx::__alloc_traits_Alloc::size_type = long unsigned int]

As one would expect, there is a function 'size'.

Martin



Thanks,
Paolo.




Re: [PATCH 4/5] Existing tests fix

2014-06-18 Thread Martin Liška


On 06/17/2014 10:50 PM, Rainer Orth wrote:

Jeff Law l...@redhat.com writes:


On 06/13/14 04:48, mliska wrote:

Hi,
many tests rely on a precise number of scanned functions in a dump file. If 
IPA ICF decides to merge some function and(or) read-only variables, counts do 
not match.

Martin

Changelog:

2014-06-13  Martin Liska  mli...@suse.cz
Honza Hubicka  hubi...@ucw.cz

* c-c++-common/rotate-1.c: Text

^ Huh?


You are right, batch replacement mistake. There should be:

* c-c++-common/rotate-1.c: Update dg-options.
* c-c++-common/rotate-2.c: Likewise.
...


Martin




* c-c++-common/rotate-2.c: New test.
* c-c++-common/rotate-3.c: Likewise.

Rainer





[PATCH] Fix gimple-fold

2014-03-18 Thread Martin Liška

Hello,
I found ICE in Chromium compiled with LTO. There's a call that is 
proved by ipa-devirt as __builtin_unreachable; same decision is done by 
gimple-fold and this call is replaced by GIMPLE_CALL and GIMPLE_ASSIGN 
(in this order). After that condition for 
cgraph_update_edges_for_call_stmt_node is not satisfied and 
corresponding cgraph_edge is not updated. Thus a verifier reports a 
wrong edge.


Bootstrapped and tested on a x86_64 machine.


Changelog:
2014-03-18  Martin Liska  mli...@suse.cz

* cgraph.c (cgraph_update_edges_for_call_stmt_node): added case when
gimple call statement is updated.
* gimple-fold.c (gimple_fold_call): changed order for GIMPLE_ASSIGN and
GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL.

OK for trunk?

Thank you,
Martin


diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index a15b6bc..cd68894 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1519,7 +1519,11 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node,
 		{
 		  if (callee-decl == new_call
 		  || callee-former_clone_of == new_call)
-		return;
+{
+  cgraph_set_call_stmt (cgraph_edge (node, old_stmt),
+new_stmt);
+		  return;
+}
 		  callee = callee-clone_of;
 		}
 	}
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index eafdb2d..a033fbc 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -1153,8 +1153,14 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
 		{
 		  tree var = create_tmp_var (TREE_TYPE (lhs), NULL);
 		  tree def = get_or_create_ssa_default_def (cfun, var);
-		  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
-		  update_call_from_tree (gsi, def);
+
+  /* To satisfy condition for
+ cgraph_update_edges_for_call_stmt_node,
+ we need to preserve GIMPLE_CALL statement
+ at position of GSI iterator.  */
+  gimple_stmt_iterator oldgsi = *gsi;
+		  gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT);
+		  update_call_from_tree (oldgsi, def);
 		}
 		  else
 		gsi_replace (gsi, new_stmt, true);


Re: [PATCH] Fix gimple-fold

2014-03-18 Thread Martin Liška

Thank you for feedback,

new changelog:
2014-03-18  Martin Liska  mli...@suse.cz

* cgraph.c (cgraph_update_edges_for_call_stmt_node): Added case 
when

gimple call statement is update.
* gimple-fold.c (gimple_fold_call): Changed order for 
GIMPLE_ASSIGN and

GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL.

OK for trunk?

Martin


On 03/18/2014 02:13 PM, Jakub Jelinek wrote:

Hi!


2014-03-18  Martin Liska  mli...@suse.cz

 * cgraph.c (cgraph_update_edges_for_call_stmt_node): added case when
 gimple call statement is updated.

Capital letter after :


 * gimple-fold.c (gimple_fold_call): changed order for GIMPLE_ASSIGN and

Likewise here.

Jakub


diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index a15b6bc..269146a 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1519,7 +1519,10 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node,
 		{
 		  if (callee-decl == new_call
 		  || callee-former_clone_of == new_call)
-		return;
+{
+  cgraph_set_call_stmt (e, new_stmt);
+		  return;
+}
 		  callee = callee-clone_of;
 		}
 	}
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index eafdb2d..177abc1 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -1153,8 +1153,13 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
 		{
 		  tree var = create_tmp_var (TREE_TYPE (lhs), NULL);
 		  tree def = get_or_create_ssa_default_def (cfun, var);
-		  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+
+  /* To satisfy condition for
+ cgraph_update_edges_for_call_stmt_node,
+ we need to preserve GIMPLE_CALL statement
+ at position of GSI iterator.  */
 		  update_call_from_tree (gsi, def);
+		  gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT);
 		}
 		  else
 		gsi_replace (gsi, new_stmt, true);


Re: [PATCH] Fix gimple-fold

2014-03-18 Thread Martin Liška

Patch passes bootstrap and regtest.

I fixed indentation according to discussion with Jakub.

OK for trunk?

Thanks,
Martin

On 03/18/2014 02:55 PM, Richard Biener wrote:

On Tue, Mar 18, 2014 at 2:29 PM, Martin Liška mli...@suse.cz wrote:

Thank you for feedback,

Ok if it passes bootstrap / regtest.

Thanks,
Richard.


new changelog:

2014-03-18  Martin Liska  mli...@suse.cz

 * cgraph.c (cgraph_update_edges_for_call_stmt_node): Added case when
 gimple call statement is update.

 * gimple-fold.c (gimple_fold_call): Changed order for GIMPLE_ASSIGN
and
 GIMPLE_CALL, where gsi iterator still points to GIMPLE CALL.

OK for trunk?

Martin



On 03/18/2014 02:13 PM, Jakub Jelinek wrote:

Hi!


2014-03-18  Martin Liska  mli...@suse.cz

  * cgraph.c (cgraph_update_edges_for_call_stmt_node): added case
when
  gimple call statement is updated.

Capital letter after :


  * gimple-fold.c (gimple_fold_call): changed order for
GIMPLE_ASSIGN and

Likewise here.

 Jakub




diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index a15b6bc..577352f 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1519,7 +1519,10 @@ cgraph_update_edges_for_call_stmt_node (struct cgraph_node *node,
 		{
 		  if (callee-decl == new_call
 		  || callee-former_clone_of == new_call)
-		return;
+		{
+		  cgraph_set_call_stmt (e, new_stmt);
+		  return;
+		}
 		  callee = callee-clone_of;
 		}
 	}
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index eafdb2d..adc9d49 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -1153,8 +1153,13 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace)
 		{
 		  tree var = create_tmp_var (TREE_TYPE (lhs), NULL);
 		  tree def = get_or_create_ssa_default_def (cfun, var);
-		  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+
+		  /* To satisfy condition for
+		 cgraph_update_edges_for_call_stmt_node,
+		 we need to preserve GIMPLE_CALL statement
+		 at position of GSI iterator.  */
 		  update_call_from_tree (gsi, def);
+		  gsi_insert_before (gsi, new_stmt, GSI_NEW_STMT);
 		}
 		  else
 		gsi_replace (gsi, new_stmt, true);


Re: [PATCH] Avoid ggc_collect () after WPA forking

2014-03-19 Thread Martin Liška


On 03/19/2014 03:55 PM, Richard Biener wrote:

On Wed, 19 Mar 2014, Martin Liška wrote:


There are stats for Firefox with LTO and -O2. According to graphs it
looks that memory consumption for parallel WPA phase is similar.
When I disable parallel WPA, wpa footprint is ~4GB, but ltrans memory
footprint is similar to parallel WPA that reduces libxul.so linking by ~10%.

Ok, so I suppose this tracks RSS, not virtual memory use (what is
used and what is active)?


Data are given by vmstat, according to: 
http://stackoverflow.com/questions/18529723/what-is-active-memory-and-inactive-memory


*Active memory*is memory that is being used by a particular process.
*Inactive memory*is memory that was allocated to a process that is no 
longer running.


So please follow just 'blue' line that displays really used memory. 
According to man, vmstat tracks virtual memory statistics.



And it is WPA plus LTRANS stages, WPA ends where memory use first goes
down to zero?
I wonder if you can identify the point where parallel streaming
starts and where it ends ... ;)


Exactly, WPA ends when it goes to zero.


Btw, I have another patch in my local tree, limiting the
exponential growth of blocks we allocate when outputting sections.
But it shouldn't be _that_ bad ... maybe you can try if it has
any effect?


I can apply it.

Martin



Thanks,
Richard.

Index: gcc/lto-section-out.c
===
--- gcc/lto-section-out.c   (revision 208642)
+++ gcc/lto-section-out.c   (working copy)
@@ -99,13 +99,19 @@ lto_end_section (void)
  }
  
  
+/* We exponentially grow the size of the blocks as we need to make

+   room for more data to be written.  Start with a single page and go up
+   to 2MB pages for this.  */
+#define FIRST_BLOCK_SIZE 4096
+#define MAX_BLOCK_SIZE (2 * 1024 * 1024)
+
  /* Write all of the chars in OBS to the assembler.  Recycle the blocks
 in obs as this is being done.  */
  
  void

  lto_write_stream (struct lto_output_stream *obs)
  {
-  unsigned int block_size = 1024;
+  unsigned int block_size = FIRST_BLOCK_SIZE;
struct lto_char_ptr_base *block;
struct lto_char_ptr_base *next_block;
if (!obs-first_block)
@@ -135,6 +141,7 @@ lto_write_stream (struct lto_output_stre
else
lang_hooks.lto.append_data (base, num_chars, block);
block_size *= 2;
+  block_size = MIN (MAX_BLOCK_SIZE, block_size);
  }
  }
  
@@ -152,7 +159,7 @@ lto_append_block (struct lto_output_stre

  {
/* This is the first time the stream has been written
 into.  */
-  obs-block_size = 1024;
+  obs-block_size = FIRST_BLOCK_SIZE;
new_block = (struct lto_char_ptr_base*) xmalloc (obs-block_size);
obs-first_block = new_block;
  }
@@ -162,6 +169,7 @@ lto_append_block (struct lto_output_stre
/* Get a new block that is twice as big as the last block
 and link it into the list.  */
obs-block_size *= 2;
+  obs-block_size = MIN (MAX_BLOCK_SIZE, obs-block_size);
new_block = (struct lto_char_ptr_base*) xmalloc (obs-block_size);
/* The first bytes of the block are reserved as a pointer to
 the next block.  Set the chain of the full block to the




Re: [testsuite] Fix gcc.dg/ipa/ipa-icf-2[18].c on Solaris

2014-10-23 Thread Martin Liška

On 10/23/2014 11:38 AM, Jakub Jelinek wrote:

On Thu, Oct 23, 2014 at 11:35:44AM +0200, Rainer Orth wrote:

The testcase is clearly i?86/x86_64 specific, so it must be guarded with
{ target i?86-*-* x86_64-*-* }, and as it is a compile time test, no need


Right, that's what my patch already does.


for assembler support, so just adding -msse2 option to dg-option is the
right thing to do.


Ok, will try that.  Still no idea why Linux/x86 and Solaris/x86 are
different here.


Why do you think so?  I certainly have:
FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors)
UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Equal symbols: 1
UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Semantic equality 
hit:bar-foo
in my i686-linux test_summary log.  It really depends on how the compiler
is configured, if it defaults to march that has sse/sse2 by default, it will
succeed, otherwise it will not.

Jakub



Hello.

I combined both patches and run regression tests on x86_64-linux-pc.
Ready for trunk?

Thanks,
Martin
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c
index 7358e43..68aabc5 100644
--- a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options -O2 -fdump-ipa-icf  } */
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options -O2 -msse2 -fdump-ipa-icf  } */
 
 #include xmmintrin.h
 
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c b/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c
index 538e0ab..bcaf84e 100644
--- a/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-28.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target init_priority } } */
 /* { dg-options -O2 -fdump-ipa-icf-details -fno-inline  } */
 
 __attribute__ ((noinline, constructor(200)))


Re: [PATCH 5/5] New tests introduction

2014-10-23 Thread Martin Liška

On 10/19/2014 09:50 AM, Andreas Schwab wrote:

Martin Liška mli...@suse.cz writes:


diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c 
b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c
new file mode 100644
index 000..7358e43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-ipa-icf  } */
+
+#include xmmintrin.h
+
+__attribute__ ((noinline))
+void foo()
+{
+  float x = 1.2345f;
+  __m128 v =_mm_load1_ps(x);
+}
+
+__attribute__ ((noinline))
+void bar()
+{
+  float x = 1.2345f;
+  __m128 v =_mm_load1_ps(x);
+}
+
+int main()
+{
+  return 2;
+}
+
+/* { dg-final { scan-ipa-dump Semantic equality hit:bar-foo icf  } } */
+/* { dg-final { scan-ipa-dump Equal symbols: 1 icf  } } */
+/* { dg-final { cleanup-ipa-dump icf } } */


FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors)
Excess errors:
/usr/local/gcc/gcc-20141019/gcc/testsuite/gcc.dg/ipa/ipa-icf-21.c:4:23: fatal e\
rror: xmmintrin.h: No such file or directory
compilation terminated.

Andreas.



Hello Andreas.

Starting from r216589 this problem should be fixed.

Thanks,
Martin



Re: [testsuite] Fix gcc.dg/ipa/ipa-icf-2[18].c on Solaris

2014-10-23 Thread Martin Liška

On 10/23/2014 04:03 PM, Uros Bizjak wrote:

On Thu, Oct 23, 2014 at 3:54 PM, Uros Bizjak ubiz...@gmail.com wrote:


Ok, will try that.  Still no idea why Linux/x86 and Solaris/x86 are
different here.


Why do you think so?  I certainly have:
FAIL: gcc.dg/ipa/ipa-icf-21.c (test for excess errors)
UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Equal symbols: 1
UNRESOLVED: gcc.dg/ipa/ipa-icf-21.c scan-ipa-dump icf Semantic equality
hit:bar-foo
in my i686-linux test_summary log.  It really depends on how the compiler


Right, but on i386-pc-solaris2.11 it passes.


is configured, if it defaults to march that has sse/sse2 by default, it will
succeed, otherwise it will not.


For some reason, I hit a division by zero on alphaev68-linux-gnu in
the report generation code:

Starting program: /space/uros/gcc-build/prev-gcc/cc1 -O2
-fdump-ipa-icf -quiet ipa-icf-14.i

Program received signal SIGFPE, Arithmetic exception.
ipa_icf::sem_item_optimizer::merge_classes (this=0x12187dd80,
prev_class_count=3) at
/space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2203
2203   non_singular_classes_count);
(gdb) list
2198  fprintf (dump_file, Average class size before: %.2f,
after: %.2f\n,
2199   1.0f * item_count / prev_class_count,
2200   1.0f * item_count / class_count);
2201  fprintf (dump_file, Average non-singular class size:
%.2f, count: %u\n,
2202   1.0f * non_singular_classes_sum /
non_singular_classes_count,
2203   non_singular_classes_count);
2204  fprintf (dump_file, Equal symbols: %u\n, equal_items);
2205  fprintf (dump_file, Fraction of visited symbols: %.2f%%\n\n,
2206   100.0f * equal_items / item_count);
2207}
(gdb) p non_singular_classes_count
$1 = 0


Also:

(gdb) p non_singular_classes_sum
$1 = 0

This creates a nice NaN which can throw an exception.


Hello.

Sorry for a stupid bug. I attached patch that should fix these divisions by 
zero.
I'm just wondering if we have a machine in compile farm with alpha?

Thanks,
Martin



(gdb) bt
#0  ipa_icf::sem_item_optimizer::merge_classes (this=0x12187dd80,
prev_class_count=3)
 at /space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2203
#1  0x000121255c70 in ipa_icf::sem_item_optimizer::execute
(this=0x12187dd80) at
/space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:1602
#2  0x00012125999c in ipa_icf::ipa_icf_driver () at
/space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2319
#3  0x00012125a46c in ipa_icf::pass_ipa_icf::execute
(this=0x121850f20) at
/space/homedirs/uros/gcc-svn/trunk/gcc/ipa-icf.c:2367
#4  0x0001209c10c4 in execute_one_pass (pass=0x121850f20) at
/space/homedirs/uros/gcc-svn/trunk/gcc/passes.c:2156
#5  0x0001209c26f4 in execute_ipa_pass_list (pass=0x121850f20) at
/space/homedirs/uros/gcc-svn/trunk/gcc/passes.c:2550
#6  0x00012048fcdc in ipa_passes () at
/space/homedirs/uros/gcc-svn/trunk/gcc/cgraphunit.c:2057

Uros.



diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index d1238a4..e7a293e 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -1736,7 +1736,7 @@ sem_item_optimizer::parse_nonsingleton_classes (void)
 
   if (dump_file)
 fprintf (dump_file, Init called for %u items (%.2f%%).\n, init_called_count,
-	 100.0f * init_called_count / m_items.length ());
+	 m_items.length () ? 100.0f * init_called_count / m_items.length (): 0.0f);
 }
 
 /* Equality function for semantic items is used to subdivide existing
@@ -2196,14 +2196,15 @@ sem_item_optimizer::merge_classes (unsigned int prev_class_count)
   fprintf (dump_file, Congruent classes before: %u, after: %u\n,
 	   prev_class_count, class_count);
   fprintf (dump_file, Average class size before: %.2f, after: %.2f\n,
-	   1.0f * item_count / prev_class_count,
-	   1.0f * item_count / class_count);
+	   prev_class_count ? 1.0f * item_count / prev_class_count : 0.0f,
+	   class_count ? 1.0f * item_count / class_count : 0.0f);
   fprintf (dump_file, Average non-singular class size: %.2f, count: %u\n,
-	   1.0f * non_singular_classes_sum / non_singular_classes_count,
+	   non_singular_classes_count ? 1.0f * non_singular_classes_sum /
+	   non_singular_classes_count : 0.0f,
 	   non_singular_classes_count);
   fprintf (dump_file, Equal symbols: %u\n, equal_items);
   fprintf (dump_file, Fraction of visited symbols: %.2f%%\n\n,
-	   100.0f * equal_items / item_count);
+	   item_count ? 100.0f * equal_items / item_count : 0.0f);
 }
 
   for (hash_tablecongruence_class_group_hash::iterator it = m_classes.begin ();


[PATCH] Fix for PR63595

2014-10-24 Thread Martin Liška

Hello.

Following patch contains addition of PHI result comparison in IPA ICF.
Boostrap works on x86_64-linux-pc, no regression observed.

Ready for trunk?
Thanks,
Martin
gcc/testsuite/ChangeLog:

2014-10-24  Martin Liska  mli...@suse.cz

* gcc.dg/ipa/pr63595.c: New test.


gcc/ChangeLog:

2014-10-24  Martin Liska  mli...@suse.cz

* ipa-icf.c (sem_function::compare_phi_node): PHI result comparison
added.
diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index d1238a4..7456fec 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -869,6 +869,12 @@ sem_function::compare_phi_node (basic_block bb1, basic_block bb2)
   phi1 = gsi_stmt (si1);
   phi2 = gsi_stmt (si2);
 
+  tree phi_result1 = gimple_phi_result (phi1);
+  tree phi_result2 = gimple_phi_result (phi2);
+
+  if (!m_checker-compare_operand (phi_result1, phi_result2))
+	return return_false_with_msg (PHI results are different);
+
   size1 = gimple_phi_num_args (phi1);
   size2 = gimple_phi_num_args (phi2);
 
diff --git a/gcc/testsuite/gcc.dg/ipa/pr63595.c b/gcc/testsuite/gcc.dg/ipa/pr63595.c
new file mode 100644
index 000..52851fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr63595.c
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options -O2 -fdump-ipa-icf-details  } */
+
+typedef int size_t;
+
+typedef struct TypHeader {
+unsigned long size;
+struct TypHeader * * ptr;
+char name[3];
+unsigned char type;
+} * TypHandle;
+
+__attribute__((noinline))
+static TypHandle Error(const char *str, unsigned long l1, unsigned long l2)
+{
+  return 0;
+}
+
+extern TypHandle (* EvTab[81]) ( TypHandle hd );
+extern TypHandle (*TabProd[28][28]) ( TypHandle, TypHandle );
+
+__attribute__((noinline))
+TypHandle FunOnRight (TypHandle hdCall)
+{
+TypHandle hdRes;
+TypHandle hdPnt;
+TypHandle hdElm;
+
+
+if ( ((hdCall)-size) != 3*((size_t)sizeof(TypHandle)) )
+return Error(,0L,0L);
+hdPnt = ((long)(((TypHandle*)((hdCall)-ptr))[1])1 ? (((TypHandle*)((hdCall)-ptr))[1]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[1])  1) ? 1 : TypHandle*)((hdCall)-ptr))[1])-type))])TypHandle*)((hdCall)-ptr))[1])));
+hdElm = ((long)(((TypHandle*)((hdCall)-ptr))[2])1 ? (((TypHandle*)((hdCall)-ptr))[2]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[2])  1) ? 1 : TypHandle*)((hdCall)-ptr))[2])-type))])TypHandle*)((hdCall)-ptr))[2])));
+
+
+hdRes = ((*TabProd[(((long)(hdPnt)  1) ? 1 : ((hdPnt)-type))][(((long)(hdElm)  1) ? 1 : ((hdElm)-type))])((hdPnt),(hdElm)));
+return hdRes;
+}
+
+__attribute__((noinline))
+TypHandle FunOnLeft (TypHandle hdCall)
+{
+TypHandle hdRes;
+TypHandle hdPnt;
+TypHandle hdElm;
+
+
+if ( ((hdCall)-size) != 3*((size_t)sizeof(TypHandle)) )
+return Error(,0L,0L);
+hdPnt = ((long)(((TypHandle*)((hdCall)-ptr))[1])1 ? (((TypHandle*)((hdCall)-ptr))[1]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[1])  1) ? 1 : TypHandle*)((hdCall)-ptr))[1])-type))])TypHandle*)((hdCall)-ptr))[1])));
+hdElm = ((long)(((TypHandle*)((hdCall)-ptr))[2])1 ? (((TypHandle*)((hdCall)-ptr))[2]) : (* EvTab[(((long)(((TypHandle*)((hdCall)-ptr))[2])  1) ? 1 : TypHandle*)((hdCall)-ptr))[2])-type))])TypHandle*)((hdCall)-ptr))[2])));
+
+
+hdRes = ((*TabProd[(((long)(hdElm)  1) ? 1 : ((hdElm)-type))][(((long)(hdPnt)  1) ? 1 : ((hdPnt)-type))])((hdElm),(hdPnt)));
+return hdRes;
+}
+
+int main()
+{
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump Equal symbols: 0 icf  } } */
+/* { dg-final { scan-ipa-dump PHI results are different icf  } } */
+/* { dg-final { cleanup-ipa-dump icf } } */


Re: [PATCH, IPA ICF] Fix PR63664, PR63574 (segfault in ipa-icf pass)

2014-10-29 Thread Martin Liška

On 10/29/2014 02:45 PM, Ilya Enkovich wrote:

On 29 Oct 10:34, Richard Biener wrote:

On Tue, Oct 28, 2014 at 5:14 PM, Ilya Enkovich enkovich@gmail.com wrote:

Hi,

This patch fixes PR63664 and PR63574.  Problem is in NULL types for labels not 
handled by ICF properly.  I assume it is OK for labels to have NULL type and 
added check into ICF rather then fixed label generation.

Bootstrapped and checked on linux-x86_64.  OK for trunk?


Instead it shouldn't be called for labels instead.

Richard.



Here is a version which doesn't compare types for labels.  Is is OK?


Hello.

I've been just testing a patch, where the pass does not call compare_operand 
for gimple labels.
As the pass creates mapping between labels and basic blocks, such comparison 
will not be necessary.

Thanks,
Martin



Bootstrapped and checked on linux-x86_64.

Thanks,
Ilya
--
gcc/

2014-10-29  Ilya Enkovich  ilya.enkov...@intel.com

PR ipa/63664
PR bootstrap/63574
* ipa-icf-gimple.c (func_checker::compatible_types_p): Assert for null
args.
(func_checker::compare_operand): Don't compare types for labels.

gcc/testsuite/

2014-10-29  Ilya Enkovich  ilya.enkov...@intel.com

PR ipa/63664
* gcc.dg/ipa/pr63664.C: New.


diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 1369b74..094e8ab 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -169,6 +169,9 @@ bool func_checker::compatible_types_p (tree t1, tree t2,
   bool compare_polymorphic,
   bool first_argument)
  {
+  gcc_assert (t1);
+  gcc_assert (t2);
+
if (TREE_CODE (t1) != TREE_CODE (t2))
  return return_false_with_msg (different tree types);

@@ -214,11 +217,15 @@ func_checker::compare_operand (tree t1, tree t2)
else if (!t1 || !t2)
  return false;

-  tree tt1 = TREE_TYPE (t1);
-  tree tt2 = TREE_TYPE (t2);
+  if (TREE_CODE (t1) != LABEL_DECL
+   TREE_CODE (t2) != LABEL_DECL)
+{
+  tree tt1 = TREE_TYPE (t1);
+  tree tt2 = TREE_TYPE (t2);

-  if (!func_checker::compatible_types_p (tt1, tt2))
-return false;
+  if (!func_checker::compatible_types_p (tt1, tt2))
+   return false;
+}

base1 = get_addr_base_and_unit_offset (t1, offset1);
base2 = get_addr_base_and_unit_offset (t2, offset2);
diff --git a/gcc/testsuite/gcc.dg/ipa/pr63664.C 
b/gcc/testsuite/gcc.dg/ipa/pr63664.C
new file mode 100644
index 000..31d96d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr63664.C
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options -O2 } */
+
+class test {
+ public:
+  test (int val, int *p)
+{
+  int_val = *p;
+  bool_val = (val != int_val);
+}
+
+  ~test ()
+{
+  if (!bool_val)
+   return;
+}
+
+  int get_int_val () const { return int_val; }
+
+ private:
+  bool bool_val;
+  int int_val;
+};
+
+static int __attribute__ ((noinline))
+f1 (int i, int *p)
+{
+  test obj (i, p);
+  return obj.get_int_val ();
+}
+
+static int __attribute__ ((noinline))
+f2 (int i, int *p)
+{
+  test obj (i, p);
+  return obj.get_int_val ();
+}
+
+int
+f (int i, int *p)
+{
+  return f1 (i, p) + f2 (i, p);
+}





[PATCH] Fix for PR63587

2014-10-29 Thread Martin Liška

Hello.

Following patch fixes PR63587, where we put DECL_RESULT in 
cgraph_node::expand_thunk to local_decls.
Patch has been tested on x86_64-linux-pc without any regression and boostrap 
works correctly.

Ready for thunk?
Thanks,
Martin
gcc/testsuite/ChangeLog:

2014-10-29  Martin Liska  mli...@suse.cz

* g++.dg/ipa/pr63587-1.C: New test.
* g++.dg/ipa/pr63587-2.C: New test.


gcc/ChangeLog:

2014-10-29  Martin Liska  mli...@suse.cz

* cgraphunit.c (cgraph_node::expand_thunk): Only VAR_DECLs are put
to local declarations.
* function.c (add_local_decl): Implementation moved from header
file, assert introduced for tree type.
* function.h: Likewise.

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index a86bd1b..6f61f5c 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1550,7 +1550,9 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk)
 	  else if (!is_gimple_reg_type (restype))
 	{
 	  restmp = resdecl;
-	  add_local_decl (cfun, restmp);
+
+	  if (TREE_CODE (restmp) == VAR_DECL)
+		add_local_decl (cfun, restmp);
 	  BLOCK_VARS (DECL_INITIAL (current_function_decl)) = restmp;
 	}
 	  else
diff --git a/gcc/function.c b/gcc/function.c
index ee229ad..893ca6f 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -6441,6 +6441,15 @@ match_asm_constraints_1 (rtx_insn *insn, rtx *p_sets, int noutputs)
 df_insn_rescan (insn);
 }
 
+/* Add the decl D to the local_decls list of FUN.  */
+
+void
+add_local_decl (struct function *fun, tree d)
+{
+  gcc_assert (TREE_CODE (d) == VAR_DECL);
+  vec_safe_push (fun-local_decls, d);
+}
+
 namespace {
 
 const pass_data pass_data_match_asm_constraints =
diff --git a/gcc/function.h b/gcc/function.h
index 66384e5..aa47018 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -668,11 +668,7 @@ struct GTY(()) function {
 
 /* Add the decl D to the local_decls list of FUN.  */
 
-static inline void
-add_local_decl (struct function *fun, tree d)
-{
-  vec_safe_push (fun-local_decls, d);
-}
+void add_local_decl (struct function *fun, tree d);
 
 #define FOR_EACH_LOCAL_DECL(FUN, I, D)		\
   FOR_EACH_VEC_SAFE_ELT_REVERSE ((FUN)-local_decls, I, D)
diff --git a/gcc/testsuite/g++.dg/ipa/pr63587-1.C b/gcc/testsuite/g++.dg/ipa/pr63587-1.C
new file mode 100644
index 000..cbf872e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr63587-1.C
@@ -0,0 +1,92 @@
+// PR ipa/63587
+// { dg-do compile { target c++11 } }
+// { dg-options -O2 -fno-strict-aliasing }
+
+template class struct A
+{
+};
+template typename struct B
+{
+  template typename struct C;
+};
+class D;
+template typename class F;
+struct G
+{
+  void operator()(const D , D);
+};
+class D
+{
+public:
+  D (int);
+};
+struct H
+{
+  H (int);
+};
+template typename _Key, typename, typename, typename _Compare, typename
+class I
+{
+  typedef _Key key_type;
+  template typename _Key_compare struct J
+  {
+_Key_compare _M_key_compare;
+  };
+  J_Compare _M_impl;
+
+public:
+  Aint _M_get_insert_unique_pos (const key_type );
+  Aint _M_get_insert_hint_unique_pos (H );
+  template typename... _Args int _M_emplace_hint_unique (H, _Args ...);
+};
+template typename _Key, typename _Tp, typename _Compare = G,
+	  typename _Alloc = FA_Tp  
+class K
+{
+  typedef _Key key_type;
+  typedef _Key value_type;
+  typedef typename B_Alloc::template Cvalue_type _Pair_alloc_type;
+  Ikey_type, value_type, int, _Compare, _Pair_alloc_type _M_t;
+
+public:
+  void operator[](key_type)
+  {
+_M_t._M_emplace_hint_unique (0);
+  }
+};
+template typename _Key, typename _Val, typename _KeyOfValue,
+	  typename _Compare, typename _Alloc
+Aint
+I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_get_insert_unique_pos (
+  const key_type p1)
+{
+  _M_impl._M_key_compare (p1, 0);
+}
+template typename _Key, typename _Val, typename _KeyOfValue,
+	  typename _Compare, typename _Alloc
+Aint
+I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_get_insert_hint_unique_pos (
+  H )
+{
+  _M_get_insert_unique_pos (0);
+}
+template typename _Key, typename _Val, typename _KeyOfValue,
+	  typename _Compare, typename _Alloc
+template typename... _Args
+int
+I_Key, _Val, _KeyOfValue, _Compare, _Alloc::_M_emplace_hint_unique (
+  H p1, _Args ...)
+{
+  _M_get_insert_hint_unique_pos (p1);
+}
+namespace {
+struct L;
+}
+void
+fn1 ()
+{
+  KD, L a;
+  a[0];
+  KD, int b;
+  b[0];
+}
diff --git a/gcc/testsuite/g++.dg/ipa/pr63587-2.C b/gcc/testsuite/g++.dg/ipa/pr63587-2.C
new file mode 100644
index 000..f31c5bd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr63587-2.C
@@ -0,0 +1,250 @@
+// PR ipa/63587
+// { dg-do compile { target c++11 } }
+// { dg-options -O2 }
+
+namespace boost {
+class basic_cstring
+{
+public:
+  basic_cstring (char *);
+};
+template typename struct identity
+{
+};
+struct make_identity;
+struct function_buffer
+{
+};
+template typename FunctionObj struct function_obj_invoker0
+{
+  static int
+  invoke (function_buffer )
+  {
+FunctionObj f;
+  

Re: [PATCH, IPA ICF] Fix PR63664, PR63574 (segfault in ipa-icf pass)

2014-10-29 Thread Martin Liška

On 10/29/2014 03:07 PM, Ilya Enkovich wrote:

2014-10-29 17:01 GMT+03:00 Martin Liška mli...@suse.cz:

On 10/29/2014 02:45 PM, Ilya Enkovich wrote:


On 29 Oct 10:34, Richard Biener wrote:


On Tue, Oct 28, 2014 at 5:14 PM, Ilya Enkovich enkovich@gmail.com
wrote:


Hi,

This patch fixes PR63664 and PR63574.  Problem is in NULL types for
labels not handled by ICF properly.  I assume it is OK for labels to have
NULL type and added check into ICF rather then fixed label generation.

Bootstrapped and checked on linux-x86_64.  OK for trunk?



Instead it shouldn't be called for labels instead.

Richard.



Here is a version which doesn't compare types for labels.  Is is OK?



Hello.

I've been just testing a patch, where the pass does not call compare_operand
for gimple labels.
As the pass creates mapping between labels and basic blocks, such comparison
will not be necessary.


OK.  That would be better.


Hello.

Following patch fixes PR ipa/63574, where IPA ICF calls unnecessary 
compare_operand for LABEL_DECLs.
Patch has been tested on x86_64-linux-pc without any regression and boostrap 
works correctly.

Ready for thunk?
Thanks,
Martin




Thanks,
Ilya



Thanks,
Martin




Bootstrapped and checked on linux-x86_64.

Thanks,
Ilya
--
gcc/

2014-10-29  Ilya Enkovich  ilya.enkov...@intel.com

 PR ipa/63664
 PR bootstrap/63574
 * ipa-icf-gimple.c (func_checker::compatible_types_p): Assert for
null
 args.
 (func_checker::compare_operand): Don't compare types for labels.

gcc/testsuite/

2014-10-29  Ilya Enkovich  ilya.enkov...@intel.com

 PR ipa/63664
 * gcc.dg/ipa/pr63664.C: New.


diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index 1369b74..094e8ab 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -169,6 +169,9 @@ bool func_checker::compatible_types_p (tree t1, tree
t2,
bool compare_polymorphic,
bool first_argument)
   {
+  gcc_assert (t1);
+  gcc_assert (t2);
+
 if (TREE_CODE (t1) != TREE_CODE (t2))
   return return_false_with_msg (different tree types);

@@ -214,11 +217,15 @@ func_checker::compare_operand (tree t1, tree t2)
 else if (!t1 || !t2)
   return false;

-  tree tt1 = TREE_TYPE (t1);
-  tree tt2 = TREE_TYPE (t2);
+  if (TREE_CODE (t1) != LABEL_DECL
+   TREE_CODE (t2) != LABEL_DECL)
+{
+  tree tt1 = TREE_TYPE (t1);
+  tree tt2 = TREE_TYPE (t2);

-  if (!func_checker::compatible_types_p (tt1, tt2))
-return false;
+  if (!func_checker::compatible_types_p (tt1, tt2))
+   return false;
+}

 base1 = get_addr_base_and_unit_offset (t1, offset1);
 base2 = get_addr_base_and_unit_offset (t2, offset2);
diff --git a/gcc/testsuite/gcc.dg/ipa/pr63664.C
b/gcc/testsuite/gcc.dg/ipa/pr63664.C
new file mode 100644
index 000..31d96d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr63664.C
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options -O2 } */
+
+class test {
+ public:
+  test (int val, int *p)
+{
+  int_val = *p;
+  bool_val = (val != int_val);
+}
+
+  ~test ()
+{
+  if (!bool_val)
+   return;
+}
+
+  int get_int_val () const { return int_val; }
+
+ private:
+  bool bool_val;
+  int int_val;
+};
+
+static int __attribute__ ((noinline))
+f1 (int i, int *p)
+{
+  test obj (i, p);
+  return obj.get_int_val ();
+}
+
+static int __attribute__ ((noinline))
+f2 (int i, int *p)
+{
+  test obj (i, p);
+  return obj.get_int_val ();
+}
+
+int
+f (int i, int *p)
+{
+  return f1 (i, p) + f2 (i, p);
+}





gcc/testsuite/ChangeLog:

2014-10-29  Martin Liska  mli...@suse.cz

* g++.dg/ipa/pr63574.C: New test.


gcc/ChangeLog:

2014-10-29  Martin Liska  mli...@suse.cz

* ipa-icf-gimple.c (func_checker::compare_variable_decl):
(func_checker::parse_labels):
(func_checker::compare_gimple_label):
* ipa-icf-gimple.h:

diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c
index d3f3795..ecb9667 100644
--- a/gcc/ipa-icf-gimple.c
+++ b/gcc/ipa-icf-gimple.c
@@ -527,6 +527,10 @@ func_checker::compare_variable_decl (tree t1, tree t2)
   return return_with_debug (ret);
 }
 
+
+/* Function visits all gimple labels and creates corresponding
+   mapping between basic blocks and labels.  */
+
 void
 func_checker::parse_labels (sem_bb *bb)
 {
@@ -765,7 +769,8 @@ func_checker::compare_gimple_label (gimple g1, gimple g2)
   if (FORCED_LABEL (t1) || FORCED_LABEL (t2))
 return return_false_with_msg (FORCED_LABEL);
 
-  return compare_tree_ssa_label (t1, t2);
+  /* As the pass build BB to label mapping, no further check is needed.  */
+  return true;
 }
 
 /* Verifies for given GIMPLEs S1 and S2 that
diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h
index 8487a2a..5811bd1 100644
--- a/gcc/ipa-icf-gimple.h
+++ b/gcc/ipa-icf-gimple.h
@@ -145,6 +145,8 @@ public:
   /* Memory release routine.  */
   ~func_checker();
 
+  /* Function visits all

  1   2   3   4   5   6   7   8   9   10   >