Hi, in IPA-CP, a few functions look at callees of cgraph edges to see if they still lead to the original node (those still need to be examined or re-examined) or to a clone (those are considered already decided and should be left alone). The problem is that currently, when looking at an SCC, these functions decide not to "close" the SCC by switching the last edge from a specialized clone to a specialized clone because it already leads to a clone, albeit one for all contexts.
In the testcase ipcp-agg-7.c that is part of this patch, this leads to two copies of foo, one of them not specialized for known aggregate values, even though there should have been just one. The patch fixes this by introducing another flag is_all_contexts_clone and testing it in the necessary functions. In order to avoid confusion, I renamed already existing flag clone_for_all_contexts to do_clone_for_all_contexts. Additionally, I discovered that decide_whether_version_node contains a bogus "|| !plats->aggs" in a test, which is harmless but just should not be there, so I removed it. Bootstrapped and tested on x86_64-linux. OK for trunk? Martin 2012-11-20 Martin Jambor <mjam...@suse.cz> * ipa-prop.h (struct ipa_node_params): Rename clone_for_all_contexts to do_clone_for_all_contexts. Update all uses. New flag is_all_contexts_clone. * ipa-cp.c (cgraph_edge_brings_value_p): Also consider the case when cs leads to the clone for all contexts. (perhaps_add_new_callers): Likewise. (decide_whether_version_node): Remove bogus !plats->aggs test. Set is_all_contexts_clone when cloning for all contexts. * testsuite/gcc.dg/ipa/ipcp-agg-7.c: New test. * testsuite/gcc.dg/ipa/ipcp-agg-8.c: Likewise. Index: src/gcc/ipa-cp.c =================================================================== --- src.orig/gcc/ipa-cp.c +++ src/gcc/ipa-cp.c @@ -1828,7 +1828,7 @@ estimate_local_effects (struct cgraph_no if (size <= 0 || cgraph_will_be_removed_from_program_if_no_direct_calls (node)) { - info->clone_for_all_contexts = true; + info->do_clone_for_all_contexts = true; base_time = time; if (dump_file) @@ -1841,7 +1841,7 @@ estimate_local_effects (struct cgraph_no { if (size + overall_size <= max_new_size) { - info->clone_for_all_contexts = true; + info->do_clone_for_all_contexts = true; base_time = time; overall_size += size; @@ -2314,8 +2314,9 @@ cgraph_edge_brings_value_p (struct cgrap struct ipcp_value_source *src) { struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); + struct ipa_node_params *dst_info = IPA_NODE_REF (cs->callee); - if (IPA_NODE_REF (cs->callee)->ipcp_orig_node + if ((dst_info->ipcp_orig_node && !dst_info->is_all_contexts_clone) || caller_info->node_dead) return false; if (!src->val) @@ -3177,8 +3178,9 @@ perhaps_add_new_callers (struct cgraph_n while (cs) { enum availability availability; - - if (cgraph_function_node (cs->callee, &availability) == node + struct cgraph_node *dst = cgraph_function_node (cs->callee, + &availability); + if ((dst == node || IPA_NODE_REF (dst)->is_all_contexts_clone) && availability > AVAIL_OVERWRITABLE && cgraph_edge_brings_value_p (cs, src)) { @@ -3337,8 +3339,8 @@ decide_whether_version_node (struct cgra cgraph_node_name (node), node->uid); gather_context_independent_values (info, &known_csts, &known_binfos, - info->clone_for_all_contexts ? &known_aggs - : NULL, NULL); + info->do_clone_for_all_contexts ? &known_aggs + : NULL, NULL); for (i = 0; i < count ;i++) { @@ -3353,7 +3355,7 @@ decide_whether_version_node (struct cgra ret |= decide_about_value (node, i, -1, val, known_csts, known_binfos); - if (!plats->aggs_bottom || !plats->aggs) + if (!plats->aggs_bottom) { struct ipcp_agg_lattice *aglat; struct ipcp_value *val; @@ -3370,8 +3372,9 @@ decide_whether_version_node (struct cgra info = IPA_NODE_REF (node); } - if (info->clone_for_all_contexts) + if (info->do_clone_for_all_contexts) { + struct cgraph_node *clone; vec<cgraph_edge_p> callers; if (dump_file) @@ -3381,11 +3384,12 @@ decide_whether_version_node (struct cgra callers = collect_callers_of_node (node); move_binfos_to_values (known_csts, known_binfos); - create_specialized_node (node, known_csts, + clone = create_specialized_node (node, known_csts, known_aggs_to_agg_replacement_list (known_aggs), callers); info = IPA_NODE_REF (node); - info->clone_for_all_contexts = false; + info->do_clone_for_all_contexts = false; + IPA_NODE_REF (clone)->is_all_contexts_clone = true; ret = true; } else Index: src/gcc/ipa-prop.h =================================================================== --- src.orig/gcc/ipa-prop.h +++ src/gcc/ipa-prop.h @@ -328,7 +328,9 @@ struct ipa_node_params unsigned node_enqueued : 1; /* Whether we should create a specialized version based on values that are known to be constant in all contexts. */ - unsigned clone_for_all_contexts : 1; + unsigned do_clone_for_all_contexts : 1; + /* Set if this is an IPA-CP clone for all contexts. */ + unsigned is_all_contexts_clone : 1; /* Node has been completely replaced by clones and will be removed after ipa-cp is finished. */ unsigned node_dead : 1; Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-7.c =================================================================== --- /dev/null +++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-7.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details -fdump-tree-optimized-slim" } */ +/* { dg-add-options bind_pic_locally } */ + +struct S +{ + int a, b, c; +}; + +void *blah(int, void *); + +static void __attribute__ ((noinline)) foo (int x, int z, struct S *p); + +static void __attribute__ ((noinline)) +bar (int x, int z, struct S *p) +{ + foo (z, x, p); +} + +static void __attribute__ ((noinline)) +foo (int x, int z, struct S *p) +{ + int i, c = p->c; + int b = p->b - z; + void *v = (void *) p; + + if (z) + { + z--; + bar (z, x, p); + } + for (i = 0; i< c; i++) + v = blah(b + x + i, v); +} + +void +entry (int c) +{ + struct S s; + int i; + + for (i = 0; i<c; i++) + { + s.a = c; + s.b = 64; + s.c = 32; + foo (4, i, &s); + } +} +/* { dg-final { scan-ipa-dump-times "Clone of bar" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "Clone of foo" 1 "cp" } } */ +/* { dg-final { cleanup-ipa-dump "cp" } } */ +/* { dg-final { scan-tree-dump-not "->c;" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-8.c =================================================================== --- /dev/null +++ src/gcc/testsuite/gcc.dg/ipa/ipcp-agg-8.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-ipa-sra -fdump-tree-optimized-slim" } */ +/* { dg-add-options bind_pic_locally } */ + +struct S +{ + int a, b, c; +}; + +void *blah(int, void *); + +static void __attribute__ ((noinline)) foo (int x, int z, struct S *p); + +static void __attribute__ ((noinline)) +bar (int x, int z, struct S *p) +{ + p->b = 0; + foo (z, x, p); +} + +static void __attribute__ ((noinline)) +foo (int x, int z, struct S *p) +{ + int i, c = p->c; + int b = p->b - z; + void *v = (void *) p; + + if (z) + { + z--; + bar (z, x, p); + } + for (i = 0; i< c; i++) + v = blah(b + x + i, v); +} + +void +entry (int c) +{ + struct S s; + int i; + + for (i = 0; i<c; i++) + { + s.a = c; + s.b = 64; + s.c = 32; + foo (4, i, &s); + } +} +/* { dg-final { scan-tree-dump "->b;" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */