On Fri, Nov 18, 2011 at 6:47 PM, Martin Jambor <mjam...@suse.cz> wrote:
> Hi,
>
> PR 50744 is an issue with an integer overflow when we propagate the
> estimated size and time effects from callees to callers.  Because such
> paths in the parameter value graph can be arbitrarily long, we simply
> need to introduce an artificial cap on these values.  This is what the
> patch below does.  The cap should be more than enough for any
> reasonable values.
>
> Moreover, I have looked at how we then process the accumulated
> estimates and decided to compute evaluation ratio in
> good_cloning_opportunity_p in HOST_WIDEST_INT.  Call graph frequencies
> are numerators of fractions with denominator 1000 and therefore
> capping the size and cost estimate as well as the frequency sums so
> that their multiplication would not overflow an int seems too
> constraining on 32bit hosts.
>
> Bootstrapped and tested on x86_64-linux without any problems, OK for
> trunk?

This introduces host-dependent code generation differences, right?
You can simply use int64_t for code that is run on the host only.

Richard.

> Thanks,
>
> Martin
>
>
>
> 2011-11-15  Martin Jambor  <mjam...@suse.cz>
>
>        PR tree-optimization/50744
>        * ipa-cp.c (good_cloning_opportunity_p): Assert size_cost is positive,
>        compute evaluation in HOST_WIDEST_INT.
>        (safe_add): New function
>        (propagate_effects): Use safe_add to accumulate effects.
>
>        * testsuite/gcc.dg/ipa/pr50744.c: New test.
>
>
> Index: src/gcc/testsuite/gcc.dg/ipa/pr50744.c
> ===================================================================
> --- /dev/null
> +++ src/gcc/testsuite/gcc.dg/ipa/pr50744.c
> @@ -0,0 +1,119 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fno-optimize-sibling-calls" } */
> +
> +extern int use_data (void *p_01, void *p_02, void *p_03, void *p_04, void 
> *p_05,
> +                    void *p_06, void *p_07, void *p_08, void *p_09, void 
> *p_10,
> +                    void *p_11, void *p_12, void *p_13, void *p_14, void 
> *p_15,
> +                    void *p_16, void *p_17, void *p_18, void *p_19, void 
> *p_20,
> +                    void *p_21, void *p_22, void *p_23, void *p_24, void 
> *p_25,
> +                    void *p_26, void *p_27, void *p_28, void *p_29,
> +                    void *p_30);
> +
> +extern int idx (int i, int j, int n);
> +
> +struct stuff
> +{
> +  int decision;
> +  int *a, *b, *c;
> +  int res;
> +};
> +
> +
> +#define some_large_stuff(stuff, n) { \
> +  int i, j, k; \
> +  for (i = 0; i < n; i++) \
> +    for (j = 0; j < n; j++) \
> +      { \
> +       int v = stuff->c[idx(i, j, n)]; \
> +       for (k = 0; k < n; k++) \
> +         v += stuff->a[idx(i, k, n)] * stuff->b[idx(k,j,n)]; \
> +       stuff->c[idx(i, j, n)] = v; \
> +      } \
> +}
> +
> +#define recursion if (iter > 0) \
> +    foo (stuff, iter - 1, (void *) -1, p_01, p_02, p_03, p_04, p_05, p_06, \
> +      p_07, p_08, p_09, p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, \
> +     p_18, p_19, p_20, p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, 
> p_29); \
> +    else \
> +      foo (stuff, iter, p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, 
> p_09, \
> +       p_10, p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20, \
> +        p_21,p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30)
> +
> +void
> +foo (struct stuff *stuff,
> +     int iter,
> +     void *p_01, void *p_02, void *p_03, void *p_04, void *p_05,
> +     void *p_06, void *p_07, void *p_08, void *p_09, void *p_10,
> +     void *p_11, void *p_12, void *p_13, void *p_14, void *p_15,
> +     void *p_16, void *p_17, void *p_18, void *p_19, void *p_20,
> +     void *p_21, void *p_22, void *p_23, void *p_24, void *p_25,
> +     void *p_26, void *p_27, void *p_28, void *p_29, void *p_30)
> +{
> + switch (stuff->decision)
> +   {
> +   case 0:
> +     some_large_stuff (stuff, 83);
> +     stuff->res =
> +       use_data (p_01, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
> +                p_11, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
> +                p_21, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +
> +   case 1:
> +     some_large_stuff (stuff, 25);
> +     stuff->res =
> +       use_data (p_11, p_02, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
> +                p_21, p_12, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
> +                p_01, p_22, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +
> +   case 3:
> +     some_large_stuff (stuff, 139);
> +     stuff->res =
> +       use_data (p_01, p_12, p_03, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
> +                p_11, p_22, p_13, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
> +                p_21, p_02, p_23, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +
> +   case 4:
> +     some_large_stuff (stuff, 32);
> +     stuff->res =
> +       use_data (p_01, p_02, p_13, p_04, p_05, p_06, p_07, p_08, p_09, p_10,
> +                p_11, p_12, p_23, p_14, p_15, p_16, p_17, p_18, p_19, p_20,
> +                p_21, p_22, p_03, p_24, p_25, p_26, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +
> +   case 5:
> +     some_large_stuff (stuff, 205);
> +     stuff->res =
> +       use_data (p_01, p_02, p_03, p_04, p_15, p_06, p_07, p_08, p_09, p_10,
> +                p_11, p_12, p_13, p_14, p_25, p_16, p_17, p_18, p_19, p_20,
> +                p_21, p_22, p_23, p_24, p_05, p_26, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +
> +   case 6:
> +     some_large_stuff (stuff, 64);
> +     stuff->res =
> +       use_data (p_01, p_02, p_03, p_04, p_05, p_16, p_07, p_08, p_09, p_10,
> +                p_11, p_12, p_13, p_14, p_15, p_26, p_17, p_18, p_19, p_20,
> +                p_21, p_22, p_23, p_24, p_25, p_06, p_27, p_28, p_29, p_30);
> +     recursion;
> +     break;
> +   }
> +}
> +
> +#define NULL (void *)0
> +
> +void
> +bar (struct stuff *stuff, int iter)
> +{
> +  foo (stuff, iter, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
> +       NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
> NULL,
> +       NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
> +}
> Index: src/gcc/ipa-cp.c
> ===================================================================
> --- src.orig/gcc/ipa-cp.c
> +++ src/gcc/ipa-cp.c
> @@ -1225,19 +1229,19 @@ good_cloning_opportunity_p (struct cgrap
>       || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
>     return false;
>
> -  gcc_checking_assert (size_cost >= 0);
> +  gcc_assert (size_cost > 0);
>
> -  /* FIXME:  These decisions need tuning.  */
>   if (max_count)
>     {
> -      int evaluation, factor = (count_sum * 1000) / max_count;
> -
> -      evaluation = (time_benefit * factor) / size_cost;
> +      int factor = (count_sum * 1000) / max_count;
> +      HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * factor)
> +                                   / size_cost);
>
>       if (dump_file && (dump_flags & TDF_DETAILS))
>        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
>                 "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC
> -                ") -> evaluation: %i, threshold: %i\n",
> +                ") -> evaluation: " HOST_WIDEST_INT_PRINT_DEC
> +                ", threshold: %i\n",
>                 time_benefit, size_cost, (HOST_WIDE_INT) count_sum,
>                 evaluation, 500);
>
> @@ -1245,11 +1249,13 @@ good_cloning_opportunity_p (struct cgrap
>     }
>   else
>     {
> -      int evaluation = (time_benefit * freq_sum) / size_cost;
> +      HOST_WIDEST_INT evaluation = (((HOST_WIDEST_INT) time_benefit * 
> freq_sum)
> +                                   / size_cost);
>
>       if (dump_file && (dump_flags & TDF_DETAILS))
>        fprintf (dump_file, "     good_cloning_opportunity_p (time: %i, "
> -                "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n",
> +                "size: %i, freq_sum: %i) -> evaluation: "
> +                HOST_WIDEST_INT_PRINT_DEC ", threshold: %i\n",
>                 time_benefit, size_cost, freq_sum, evaluation,
>                 CGRAPH_FREQ_BASE /2);
>
> @@ -1574,6 +1580,20 @@ propagate_constants_topo (struct topo_in
>     }
>  }
>
> +
> +/* Return the sum of A and B if none of them is bigger than INT_MAX/2, return
> +   the bigger one otherwise.  */
> +
> +static int
> +safe_add (int a, int b)
> +{
> +  if (a > INT_MAX/2 || b > INT_MAX/2)
> +    return a > b ? a : b;
> +  else
> +    return a + b;
> +}
> +
> +
>  /* Propagate the estimated effects of individual values along the topological
>    from the dependant values to those they depend on.  */
>
> @@ -1590,8 +1610,9 @@ propagate_effects (void)
>
>       for (val = base; val; val = val->scc_next)
>        {
> -         time += val->local_time_benefit + val->prop_time_benefit;
> -         size += val->local_size_cost + val->prop_size_cost;
> +         time = safe_add (time,
> +                          val->local_time_benefit + val->prop_time_benefit);
> +         size = safe_add (size, val->local_size_cost + val->prop_size_cost);
>        }
>
>       for (val = base; val; val = val->scc_next)
> @@ -1599,8 +1620,10 @@ propagate_effects (void)
>          if (src->val
>              && cgraph_maybe_hot_edge_p (src->cs))
>            {
> -             src->val->prop_time_benefit += time;
> -             src->val->prop_size_cost += size;
> +             src->val->prop_time_benefit = safe_add (time,
> +                                               src->val->prop_time_benefit);
> +             src->val->prop_size_cost = safe_add (size,
> +                                                  src->val->prop_size_cost);
>            }
>     }
>  }

Reply via email to