On Fri, Nov 18, 2011 at 6:47 PM, Martin Jambor <mjam...@suse.cz> wrote: > Hi, > > PR 50744 is an issue with an integer overflow when we propagate the > estimated size and time effects from callees to callers. Because such > paths in the parameter value graph can be arbitrarily long, we simply > need to introduce an artificial cap on these values. This is what the > patch below does. The cap should be more than enough for any > reasonable values. > > Moreover, I have looked at how we then process the accumulated > estimates and decided to compute evaluation ratio in > good_cloning_opportunity_p in HOST_WIDEST_INT. Call graph frequencies > are numerators of fractions with denominator 1000 and therefore > capping the size and cost estimate as well as the frequency sums so > that their multiplication would not overflow an int seems too > constraining on 32bit hosts. > > Bootstrapped and tested on x86_64-linux without any problems, OK for > trunk?
This introduces host-dependent code generation differences, right? You can simply use int64_t for code that is run on the host only. Richard. > Thanks, > > Martin > > > > 2011-11-15 Martin Jambor <mjam...@suse.cz> > > PR tree-optimization/50744 > * ipa-cp.c (good_cloning_opportunity_p): Assert size_cost is positive, > compute evaluation in HOST_WIDEST_INT. > (safe_add): New function > (propagate_effects): Use safe_add to accumulate effects. > > * testsuite/gcc.dg/ipa/pr50744.c: New test. > > > Index: src/gcc/testsuite/gcc.dg/ipa/pr50744.c > =================================================================== > --- /dev/null > +++ src/gcc/testsuite/gcc.dg/ipa/pr50744.c > @@ -0,0 +1,119 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fno-optimize-sibling-calls" } */ > + > +extern int use_data (void *p_01, void *p_02, void *p_03, void *p_04, void > *p_05, > + void *p_06, void *p_07, void *p_08, void *p_09, void > *p_10, > + void *p_11, void *p_12, void *p_13, void *p_14, void > *p_15, > + void *p_16, void *p_17, void *p_18, void *p_19, void > *p_20, > + void *p_21, void *p_22, void *p_23, void *p_24, void > *p_25, > + void *p_26, void *p_27, void *p_28, void *p_29, > + void *p_30); > + > +extern int idx (int i, int j, int n); > + > +struct stuff > +{ > + int decision; > + int *a, *b, *c; > + int res; > +}; > + > + > +#define some_large_stuff(stuff, n) { \ > + int i, j, k; \ > + for (i = 0; i < n; i++) \ > + for (j = 0; j < n; j++) \ > + { \ > + int v = stuff->c[idx(i, j, n)]; \ > + for (k = 0; k < n; k++) \ > + v += stuff->a[idx(i, k, n)] * stuff->b[idx(k,j,n)]; \ > + stuff->c[idx(i, j, n)] = v; \ > + } \ > +} > + > +#define recursion if (iter > 0) \ > + foo (stuff, iter - 1, (void *) -1, p_01, p_02, p_03, p_04, p_05, p_06, \ > + p_07, p_08, p_09, p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, \ > + p_18, p_19, p_20, p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, > p_29); \ > + else \ > + foo (stuff, iter, p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, > p_09, \ > + p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, \ > + p_21,p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30) > + > +void > +foo (struct stuff *stuff, > + int iter, > + void *p_01, void *p_02, void *p_03, void *p_04, void *p_05, > + void *p_06, void *p_07, void *p_08, void *p_09, void *p_10, > + void *p_11, void *p_12, void *p_13, void *p_14, void *p_15, > + void *p_16, void *p_17, void *p_18, void *p_19, void *p_20, > + void *p_21, void *p_22, void *p_23, void *p_24, void *p_25, > + void *p_26, void *p_27, void *p_28, void *p_29, void *p_30) > +{ > + switch (stuff->decision) > + { > + case 0: > + some_large_stuff (stuff, 83); > + stuff->res = > + use_data (p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, > + p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, > + p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); > + recursion; > + break; > + > + case 1: > + some_large_stuff (stuff, 25); > + stuff->res = > + use_data (p_11, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, > + p_21, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, > + p_01, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); > + recursion; > + break; > + > + case 3: > + some_large_stuff (stuff, 139); > + stuff->res = > + use_data (p_01, p_12, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10, > + p_11, p_22, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, > + p_21, p_02, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30); > + recursion; > + break; > + > + case 4: > + some_large_stuff (stuff, 32); > + stuff->res = > + use_data (p_01, p_02, p_13, p_04, p_05, p_06, p_07, p_08, p_09, p_10, > + p_11, p_12, p_23, p_14, p_15, p_16, p_17, p_18, p_19, p_20, > + p_21, p_22, p_03, p_24, p_25, p_26, p_27, p_28, p_29, p_30); > + recursion; > + break; > + > + case 5: > + some_large_stuff (stuff, 205); > + stuff->res = > + use_data (p_01, p_02, p_03, p_04, p_15, p_06, p_07, p_08, p_09, p_10, > + p_11, p_12, p_13, p_14, p_25, p_16, p_17, p_18, p_19, p_20, > + p_21, p_22, p_23, p_24, p_05, p_26, p_27, p_28, p_29, p_30); > + recursion; > + break; > + > + case 6: > + some_large_stuff (stuff, 64); > + stuff->res = > + use_data (p_01, p_02, p_03, p_04, p_05, p_16, p_07, p_08, p_09, p_10, > + p_11, p_12, p_13, p_14, p_15, p_26, p_17, p_18, p_19, p_20, > + p_21, p_22, p_23, p_24, p_25, p_06, p_27, p_28, p_29, p_30); > + recursion; > + break; > + } > +} > + > +#define NULL (void *)0 > + > +void > +bar (struct stuff *stuff, int iter) > +{ > + foo (stuff, iter, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, > + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, > NULL, > + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); > +} > Index: src/gcc/ipa-cp.c > =================================================================== > --- src.orig/gcc/ipa-cp.c > +++ src/gcc/ipa-cp.c > @@ -1225,19 +1229,19 @@ good_cloning_opportunity_p (struct cgrap > || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) > return false; > > - gcc_checking_assert (size_cost >= 0); > + gcc_assert (size_cost > 0); > > - /* FIXME: These decisions need tuning. */ > if (max_count) > { > - int evaluation, factor = (count_sum * 1000) / max_count; > - > - evaluation = (time_benefit * factor) / size_cost; > + int factor = (count_sum * 1000) / max_count; > + HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * factor) > + / size_cost); > > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " > "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC > - ") -> evaluation: %i, threshold: %i\n", > + ") -> evaluation: " HOST_WIDEST_INT_PRINT_DEC > + ", threshold: %i\n", > time_benefit, size_cost, (HOST_WIDE_INT) count_sum, > evaluation, 500); > > @@ -1245,11 +1249,13 @@ good_cloning_opportunity_p (struct cgrap > } > else > { > - int evaluation = (time_benefit * freq_sum) / size_cost; > + HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * > freq_sum) > + / size_cost); > > if (dump_file && (dump_flags & TDF_DETAILS)) > fprintf (dump_file, " good_cloning_opportunity_p (time: %i, " > - "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n", > + "size: %i, freq_sum: %i) -> evaluation: " > + HOST_WIDEST_INT_PRINT_DEC ", threshold: %i\n", > time_benefit, size_cost, freq_sum, evaluation, > CGRAPH_FREQ_BASE /2); > > @@ -1574,6 +1580,20 @@ propagate_constants_topo (struct topo_in > } > } > > + > +/* Return the sum of A and B if none of them is bigger than INT_MAX/2, return > + the bigger one otherwise. */ > + > +static int > +safe_add (int a, int b) > +{ > + if (a > INT_MAX/2 || b > INT_MAX/2) > + return a > b ? a : b; > + else > + return a + b; > +} > + > + > /* Propagate the estimated effects of individual values along the topological > from the dependant values to those they depend on. */ > > @@ -1590,8 +1610,9 @@ propagate_effects (void) > > for (val = base; val; val = val->scc_next) > { > - time += val->local_time_benefit + val->prop_time_benefit; > - size += val->local_size_cost + val->prop_size_cost; > + time = safe_add (time, > + val->local_time_benefit + val->prop_time_benefit); > + size = safe_add (size, val->local_size_cost + val->prop_size_cost); > } > > for (val = base; val; val = val->scc_next) > @@ -1599,8 +1620,10 @@ propagate_effects (void) > if (src->val > && cgraph_maybe_hot_edge_p (src->cs)) > { > - src->val->prop_time_benefit += time; > - src->val->prop_size_cost += size; > + src->val->prop_time_benefit = safe_add (time, > + src->val->prop_time_benefit); > + src->val->prop_size_cost = safe_add (size, > + src->val->prop_size_cost); > } > } > }