Hi,
this patch makes indirect call profiling to work cross module. Unlike LIPO I
am not adding module IDs, since I do not know how to make them stable across
multiple uses of same .o files. Instead I simply assign unique ID to each
possibly indirectly called function in program. This is done by combining its
assembler name, file&line and gcov filename into single hash. For GCC this
gives no colisions.
The rest of updates is quite obvious. Currently we have moudle local
__gcov_indirect_call_callee and __gcov_indirect_call_counters to track the
calls. I made the global and define them in libgcov.
__gcov_indirect_call_profiler used to take these two as parameters and I
replaced it by __gcov_indirect_call_profiler_v2 that has those two
hard coded to simplify the call sequence.
This patch has only purpose to measure the cross-module calls and get sane
histograms attached to indirect calls. In the third patch of series I will
actually make them used by the LTO ipa-profile pass.
Bootstrapped/regtested x86_64-linux, will commit it shortly.
Honza
Index: libgcc/libgcov.c
===================================================================
--- libgcc/libgcov.c (revision 201539)
+++ libgcc/libgcov.c (working copy)
@@ -1121,6 +1121,20 @@ __gcov_one_value_profiler (gcov_type *co
#ifdef L_gcov_indirect_call_profiler
+/* These two variables are used to actually track caller and callee. Keep
+ them in TLS memory so races are not common (they are written to often).
+ The variables are set directly by GCC instrumented code, so declaration
+ here must match one in tree-profile.c */
+
+#ifdef HAVE_CC_TLS
+__thread
+#endif
+void * __gcov_indirect_call_callee;
+#ifdef HAVE_CC_TLS
+__thread
+#endif
+gcov_type * __gcov_indirect_call_counters;
+
/* By default, the C++ compiler will use function addresses in the
vtable entries. Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero
tells the compiler to use function descriptors instead. The value
@@ -1140,19 +1154,43 @@ __gcov_one_value_profiler (gcov_type *co
/* Tries to determine the most common value among its inputs. */
void
-__gcov_indirect_call_profiler (gcov_type* counter, gcov_type value,
- void* cur_func, void* callee_func)
+__gcov_indirect_call_profiler_v2 (gcov_type value, void* cur_func)
{
/* If the C++ virtual tables contain function descriptors then one
function may have multiple descriptors and we need to dereference
the descriptors to see if they point to the same function. */
- if (cur_func == callee_func
- || (VTABLE_USES_DESCRIPTORS && callee_func
- && *(void **) cur_func == *(void **) callee_func))
- __gcov_one_value_profiler_body (counter, value);
+ if (cur_func == __gcov_indirect_call_callee
+ || (VTABLE_USES_DESCRIPTORS && __gcov_indirect_call_callee
+ && *(void **) cur_func == *(void **) __gcov_indirect_call_callee))
+ __gcov_one_value_profiler_body (__gcov_indirect_call_counters, value);
}
#endif
#ifdef L_gcov_average_profiler
/* Increase corresponding COUNTER by VALUE. FIXME: Perhaps we want
Index: gcc/value-prof.c
===================================================================
--- gcc/value-prof.c (revision 201632)
+++ gcc/value-prof.c (working copy)
@@ -1173,24 +1173,56 @@ gimple_mod_subtract_transform (gimple_st
return true;
}
-static vec<cgraph_node_ptr> cgraph_node_map
- = vNULL;
+static pointer_map_t *cgraph_node_map;
/* Initialize map from FUNCDEF_NO to CGRAPH_NODE. */
void
-init_node_map (void)
+init_node_map (bool local)
{
struct cgraph_node *n;
+ cgraph_node_map = pointer_map_create ();
- if (get_last_funcdef_no ())
- cgraph_node_map.safe_grow_cleared (get_last_funcdef_no ());
-
- FOR_EACH_FUNCTION (n)
- {
- if (DECL_STRUCT_FUNCTION (n->symbol.decl))
- cgraph_node_map[DECL_STRUCT_FUNCTION (n->symbol.decl)->funcdef_no] = n;
- }
+ FOR_EACH_DEFINED_FUNCTION (n)
+ if (cgraph_function_with_gimple_body_p (n)
+ && !cgraph_only_called_directly_p (n))
+ {
+ void **val;
+ if (local)
+ {
+ n->profile_id = coverage_compute_profile_id (n);
+ while ((val = pointer_map_contains (cgraph_node_map, (void
*)(size_t)n->profile_id)) || !n->profile_id)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Local profile-id %i conflict with nodes
%s/%i %s/%i\n",
+ n->profile_id,
+ cgraph_node_name (n),
+ n->symbol.order,
+ symtab_node_name (*(symtab_node*)val),
+ (*(symtab_node *)val)->symbol.order);
+ n->profile_id = (n->profile_id + 1) & 0x7fffffff;
+ }
+ }
+ else if (!n->profile_id)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Node %s/%i has no profile-id (profile
feedback missing?)\n",
+ cgraph_node_name (n),
+ n->symbol.order);
+ continue;
+ }
+ else if ((val = pointer_map_contains (cgraph_node_map, (void
*)(size_t)n->profile_id)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Node %s/%i has IP profile-id %i conflict.
Giving up.\n",
+ cgraph_node_name (n),
+ n->symbol.order,
+ n->profile_id);
+ *val = NULL;
+ continue;
+ }
+ *pointer_map_insert (cgraph_node_map, (void *)(size_t)n->profile_id) =
(void *)n;
+ }
}
/* Delete the CGRAPH_NODE_MAP. */
@@ -1198,27 +1230,20 @@ init_node_map (void)
void
del_node_map (void)
{
- cgraph_node_map.release ();
+ pointer_map_destroy (cgraph_node_map);
}
/* Return cgraph node for function with pid */
-static inline struct cgraph_node*
-find_func_by_funcdef_no (int func_id)
+struct cgraph_node*
+find_func_by_profile_id (int func_id)
{
- int max_id = get_last_funcdef_no ();
- if (func_id >= max_id || cgraph_node_map[func_id] == NULL)
- {
- if (flag_profile_correction)
- inform (DECL_SOURCE_LOCATION (current_function_decl),
- "Inconsistent profile: indirect call target (%d) does not
exist", func_id);
- else
- error ("Inconsistent profile: indirect call target (%d) does not
exist", func_id);
-
- return NULL;
- }
-
- return cgraph_node_map[func_id];
+ void **val = pointer_map_contains (cgraph_node_map,
+ (void *)(size_t)func_id);
+ if (val)
+ return (struct cgraph_node *)*val;
+ else
+ return NULL;
}
/* Perform sanity check on the indirect call target. Due to race conditions,
@@ -1415,10 +1440,12 @@ gimple_ic_transform (gimple_stmt_iterato
val = histogram->hvalue.counters [0];
count = histogram->hvalue.counters [1];
all = histogram->hvalue.counters [2];
- gimple_remove_histogram_value (cfun, stmt, histogram);
if (4 * count <= 3 * all)
- return false;
+ {
+ gimple_remove_histogram_value (cfun, stmt, histogram);
+ return false;
+ }
bb_all = gimple_bb (stmt)->count;
/* The order of CHECK_COUNTER calls is important -
@@ -1426,16 +1453,31 @@ gimple_ic_transform (gimple_stmt_iterato
and we want to make count <= all <= bb_all. */
if ( check_counter (stmt, "ic", &all, &bb_all, bb_all)
|| check_counter (stmt, "ic", &count, &all, all))
- return false;
+ {
+ gimple_remove_histogram_value (cfun, stmt, histogram);
+ return false;
+ }
if (all > 0)
prob = GCOV_COMPUTE_SCALE (count, all);
else
prob = 0;
- direct_call = find_func_by_funcdef_no ((int)val);
+ direct_call = find_func_by_profile_id ((int)val);
if (direct_call == NULL)
- return false;
+ {
+ if (val)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Indirect call -> direct call from other
module");
+ print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
+ fprintf (dump_file, "=> %i (will resolve only with LTO)\n",
(int)val);
+ }
+ }
+ return false;
+ }
+ gimple_remove_histogram_value (cfun, stmt, histogram);
if (!check_ic_target (stmt, direct_call))
return false;
Index: gcc/value-prof.h
===================================================================
--- gcc/value-prof.h (revision 201632)
+++ gcc/value-prof.h (working copy)
@@ -103,6 +103,10 @@ extern void gimple_gen_average_profiler
extern void gimple_gen_ior_profiler (histogram_value, unsigned, unsigned);
extern void stream_out_histogram_value (struct output_block *,
histogram_value);
extern void stream_in_histogram_value (struct lto_input_block *, gimple);
+extern struct cgraph_node* find_func_by_profile_id (int func_id);
+extern gimple gimple_ic (gimple, struct cgraph_node *,
+ int, gcov_type, gcov_type);
+
/* In profile.c. */
extern void init_branch_prob (void);
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h (revision 201632)
+++ gcc/gcov-io.h (working copy)
@@ -515,7 +515,7 @@ extern void __gcov_merge_ior (gcov_type
extern void __gcov_interval_profiler (gcov_type *, gcov_type, int, unsigned);
extern void __gcov_pow2_profiler (gcov_type *, gcov_type);
extern void __gcov_one_value_profiler (gcov_type *, gcov_type);
-extern void __gcov_indirect_call_profiler (gcov_type *, gcov_type, void *,
void *);
+extern void __gcov_indirect_call_profiler_v2 (gcov_type, void *);
extern void __gcov_average_profiler (gcov_type *, gcov_type);
extern void __gcov_ior_profiler (gcov_type *, gcov_type);
Index: gcc/profile.h
===================================================================
--- gcc/profile.h (revision 201632)
+++ gcc/profile.h (working copy)
@@ -43,7 +43,7 @@ extern void mcf_smooth_cfg (void);
extern gcov_type sum_edge_counts (vec<edge, va_gc> *edges);
-extern void init_node_map (void);
+extern void init_node_map (bool);
extern void del_node_map (void);
extern void get_working_sets (void);
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c (revision 201632)
+++ gcc/coverage.c (working copy)
@@ -539,6 +539,28 @@ coverage_compute_lineno_checksum (void)
return chksum;
}
+/* Compute profile ID. This is better to be unique in whole program. */
+
+unsigned
+coverage_compute_profile_id (struct cgraph_node *n)
+{
+ expanded_location xloc
+ = expand_location (DECL_SOURCE_LOCATION (n->symbol.decl));
+ unsigned chksum = xloc.line;
+
+ chksum = coverage_checksum_string (chksum, xloc.file);
+ chksum = coverage_checksum_string
+ (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->symbol.decl)));
+ if (first_global_object_name)
+ chksum = coverage_checksum_string
+ (chksum, first_global_object_name);
+ chksum = coverage_checksum_string
+ (chksum, aux_base_name);
+
+ /* Non-negative integers are hopefully small enough to fit in all targets.
*/
+ return chksum & 0x7fffffff;
+}
+
/* Compute cfg checksum for the current function.
The checksum is calculated carefully so that
source code changes that doesn't affect the control flow graph
Index: gcc/coverage.h
===================================================================
--- gcc/coverage.h (revision 201632)
+++ gcc/coverage.h (working copy)
@@ -35,6 +35,9 @@ extern void coverage_end_function (unsig
/* Compute the control flow checksum for the current function. */
extern unsigned coverage_compute_cfg_checksum (void);
+/* Compute the profile id of function N. */
+extern unsigned coverage_compute_profile_id (struct cgraph_node *n);
+
/* Compute the line number checksum for the current function. */
extern unsigned coverage_compute_lineno_checksum (void);
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c (revision 201632)
+++ gcc/tree-profile.c (working copy)
@@ -57,8 +57,8 @@ static GTY(()) tree ptr_void;
/* Do initialization work for the edge profiler. */
/* Add code:
- static gcov* __gcov_indirect_call_counters; // pointer to actual
counter
- static void* __gcov_indirect_call_callee; // actual callee address
+ __thread gcov* __gcov_indirect_call_counters; // pointer to actual
counter
+ __thread void* __gcov_indirect_call_callee; // actual callee address
*/
static void
init_ic_make_global_vars (void)
@@ -72,7 +72,8 @@ init_ic_make_global_vars (void)
get_identifier ("__gcov_indirect_call_callee"),
ptr_void);
TREE_STATIC (ic_void_ptr_var) = 1;
- TREE_PUBLIC (ic_void_ptr_var) = 0;
+ TREE_PUBLIC (ic_void_ptr_var) = 1;
+ DECL_EXTERNAL (ic_void_ptr_var) = 1;
DECL_ARTIFICIAL (ic_void_ptr_var) = 1;
DECL_INITIAL (ic_void_ptr_var) = NULL;
if (targetm.have_tls)
@@ -87,7 +88,8 @@ init_ic_make_global_vars (void)
get_identifier ("__gcov_indirect_call_counters"),
gcov_type_ptr);
TREE_STATIC (ic_gcov_type_ptr_var) = 1;
- TREE_PUBLIC (ic_gcov_type_ptr_var) = 0;
+ TREE_PUBLIC (ic_gcov_type_ptr_var) = 1;
+ DECL_EXTERNAL (ic_gcov_type_ptr_var) = 1;
DECL_ARTIFICIAL (ic_gcov_type_ptr_var) = 1;
DECL_INITIAL (ic_gcov_type_ptr_var) = NULL;
if (targetm.have_tls)
@@ -155,14 +157,14 @@ gimple_init_edge_profiler (void)
init_ic_make_global_vars ();
- /* void (*) (gcov_type *, gcov_type, void *, void *) */
+ /* void (*) (gcov_type, void *) */
ic_profiler_fn_type
= build_function_type_list (void_type_node,
- gcov_type_ptr, gcov_type_node,
+ gcov_type_node,
ptr_void,
- ptr_void, NULL_TREE);
+ NULL_TREE);
tree_indirect_call_profiler_fn
- = build_fn_decl ("__gcov_indirect_call_profiler",
+ = build_fn_decl ("__gcov_indirect_call_profiler_v2",
ic_profiler_fn_type);
TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
@@ -352,7 +354,7 @@ gimple_gen_ic_func_profiler (void)
struct cgraph_node * c_node = cgraph_get_node (current_function_decl);
gimple_stmt_iterator gsi;
gimple stmt1, stmt2;
- tree tree_uid, cur_func, counter_ptr, ptr_var, void0;
+ tree tree_uid, cur_func, void0;
if (cgraph_only_called_directly_p (c_node))
return;
@@ -361,27 +363,19 @@ gimple_gen_ic_func_profiler (void)
/* Insert code:
- stmt1: __gcov_indirect_call_profiler (__gcov_indirect_call_counters,
- current_function_funcdef_no,
- ¤t_function_decl,
- __gcov_indirect_call_callee);
+ stmt1: __gcov_indirect_call_profiler (profile_id,
+ ¤t_function_decl)
*/
- gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
+ gsi = gsi_after_labels (split_edge (single_succ_edge (ENTRY_BLOCK_PTR)));
cur_func = force_gimple_operand_gsi (&gsi,
build_addr (current_function_decl,
current_function_decl),
true, NULL_TREE,
true, GSI_SAME_STMT);
- counter_ptr = force_gimple_operand_gsi (&gsi, ic_gcov_type_ptr_var,
- true, NULL_TREE, true,
- GSI_SAME_STMT);
- ptr_var = force_gimple_operand_gsi (&gsi, ic_void_ptr_var,
- true, NULL_TREE, true,
- GSI_SAME_STMT);
- tree_uid = build_int_cst (gcov_type_node, current_function_funcdef_no);
- stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 4,
- counter_ptr, tree_uid, cur_func, ptr_var);
+ tree_uid = build_int_cst (gcov_type_node, cgraph_get_node
(current_function_decl)->profile_id);
+ stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
+ tree_uid, cur_func);
gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
/* Set __gcov_indirect_call_callee to 0,
@@ -461,7 +455,7 @@ tree_profiling (void)
cgraphunit.c:ipa_passes(). */
gcc_assert (cgraph_state == CGRAPH_STATE_IPA_SSA);
- init_node_map();
+ init_node_map (true);
FOR_EACH_DEFINED_FUNCTION (node)
{