On Thu, 30 Apr 2020, luoxhu wrote:

> Update the patch with overflow check.  Bootstrap and regression tested PASS 
> on Power8-LE.
> 
> 
> Use determine_value_range to get value range info for fold convert expressions
> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on
> wrapping overflow inner type.  i.e.:
> 
> (long unsigned int)((unsigned  int)n * 10 + 1)
> =>
> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1
> 
> With this patch for affine combination, load/store motion could detect
> more address refs independency and promote some memory expressions to
> registers within loop.
> 
> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))"
> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow.

This is OK for trunk if bootstrapped / tested properl.

Thanks,
Richard.

> gcc/ChangeLog
> 
>       2020-04-30  Xiong Hu Luo  <luo...@linux.ibm.com>
> 
>       PR tree-optimization/83403
>       * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with
>       determine_value_range, Add fold conversion of MULT_EXPR, fix the
>       previous PLUS_EXPR.
> 
> gcc/testsuite/ChangeLog
> 
>       2020-04-30  Xiong Hu Luo  <luo...@linux.ibm.com>
> 
>       PR tree-optimization/83403
>       * gcc.dg/tree-ssa/pr83403-1.c: New test.
>       * gcc.dg/tree-ssa/pr83403-2.c: New test.
>       * gcc.dg/tree-ssa/pr83403.h: New header.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c |  8 ++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c |  8 ++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr83403.h   | 30 +++++++++++++++++++++++
>  gcc/tree-affine.c                         | 24 ++++++++++--------
>  4 files changed, 60 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> new file mode 100644
> index 00000000000..748375b03af
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE unsigned int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } 
> } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> new file mode 100644
> index 00000000000..ca2e6bbd61c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
> +
> +#define TYPE int
> +
> +#include "pr83403.h"
> +
> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } 
> } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> new file mode 100644
> index 00000000000..0da8a835b5f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
> @@ -0,0 +1,30 @@
> +__attribute__ ((noinline)) void
> +calculate (const double *__restrict__ A, const double *__restrict__ B,
> +        double *__restrict__ C)
> +{
> +  TYPE m = 0;
> +  TYPE n = 0;
> +  TYPE k = 0;
> +
> +  A = (const double *) __builtin_assume_aligned (A, 16);
> +  B = (const double *) __builtin_assume_aligned (B, 16);
> +  C = (double *) __builtin_assume_aligned (C, 16);
> +
> +  for (n = 0; n < 9; n++)
> +    {
> +      for (m = 0; m < 10; m++)
> +     {
> +       C[(n * 10) + m] = 0.0;
> +     }
> +
> +      for (k = 0; k < 17; k++)
> +     {
> +#pragma simd
> +       for (m = 0; m < 10; m++)
> +         {
> +           C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
> +         }
> +     }
> +    }
> +}
> +
> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
> index 0eb8db1b086..5620e6bf28f 100644
> --- a/gcc/tree-affine.c
> +++ b/gcc/tree-affine.c
> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code 
> code, tree type,
>           wide_int minv, maxv;
>           /* If inner type has wrapping overflow behavior, fold conversion
>              for below case:
> -              (T1)(X - CST) -> (T1)X - (T1)CST
> -            if X - CST doesn't overflow by range information.  Also handle
> -            (T1)(X + CST) as (T1)(X - (-CST)).  */
> +              (T1)(X *+- CST) -> (T1)X *+- (T1)CST
> +            if X *+- CST doesn't overflow by range information.  */
>           if (TYPE_UNSIGNED (itype)
>               && TYPE_OVERFLOW_WRAPS (itype)
> -             && TREE_CODE (op0) == SSA_NAME
>               && TREE_CODE (op1) == INTEGER_CST
> -             && icode != MULT_EXPR
> -             && get_range_info (op0, &minv, &maxv) == VR_RANGE)
> +             && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
>             {
> +             wi::overflow_type overflow = wi::OVF_NONE;
> +             signop sign = UNSIGNED;
>               if (icode == PLUS_EXPR)
> -               op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
> -             if (wi::geu_p (minv, wi::to_wide (op1)))
> +               wi::add (maxv, wi::to_wide (op1), sign, &overflow);
> +             else if (icode == MULT_EXPR)
> +               wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
> +             else
> +               wi::sub (minv, wi::to_wide (op1), sign, &overflow);
> +
> +             if (overflow == wi::OVF_NONE)
>                 {
>                   op0 = fold_convert (otype, op0);
>                   op1 = fold_convert (otype, op1);
> -                 return expr_to_aff_combination (comb, MINUS_EXPR, otype,
> -                                                 op0, op1);
> +                 return expr_to_aff_combination (comb, icode, otype, op0,
> +                                                 op1);
>                 }1
>             }
>         }
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to