On Fri, May 23, 2025 at 10:12 PM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> This improves copy prop for aggregates by working over statements that don't 
> modify the access
> just like how it is done for copying zeros.
> To speed up things, we should only have one loop back on the vuse instead of 
> doing it twice for
> each load/store assignments.

I will be submitting this after the first patch is approved. Note
there is a bug with the check of stmt_may_clobber_ref_p_1 when it
comes to checking stores, we can't use TBAA.
Otherwise you miscompile the following:
```
struct f{unsigned t;};
struct p{unsigned t;};
unsigned int tt;
static inline
void g(struct f *a, struct p *b)
{
  struct f c = *a;
  b->t = 1;
  *a = c;
}
struct f a = {};
int h(void)
{
  g(&a, (struct p*)&a);
  return a.t;
}
```

Thanks,
Andrew

>
>         PR tree-optimization/14295
>
> gcc/ChangeLog:
>
>         * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Split rewriting 
> part out into ...
>         (optimize_memcpy_to_memset_1): This function.
>         (optimize_agr_copyprop):  Walk back until we get a
>         statement that may clobber the read. Call optimize_memcpy_to_memset_1.
>         Check that the src variable is not clobbered between the 2 statements.
>         (pass_forwprop::execute): Call optimize_agr_copyprop only instead of
>         optimize_memcpy_to_memset too.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test.
>         * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test.
>         * gcc.dg/tree-ssa/copy-prop-arg-3.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  .../gcc.dg/tree-ssa/copy-prop-arg-1.c         |  37 +++++
>  .../gcc.dg/tree-ssa/copy-prop-arg-2.c         |  35 ++++
>  .../gcc.dg/tree-ssa/copy-prop-arg-3.c         |  41 +++++
>  gcc/tree-ssa-forwprop.cc                      | 149 ++++++++++++------
>  4 files changed, 212 insertions(+), 50 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> new file mode 100644
> index 00000000000..876556fc464
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs.  */
> +
> +struct s
> +{
> +  char d;
> +  int a, b;
> +  double m;
> +  int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> +  struct s temp_struct1;
> +  struct s temp_struct2;
> +  struct s temp_struct3;
> +  temp_struct1 = r;
> +  temp_struct2 = temp_struct1;
> +  temp_struct3 = temp_struct2;
> +  /* The call here should not make
> +     a difference in removing all of the copies,
> +     this is true as we end up with r = r; */
> +  g();
> +  r = temp_struct3;
> +  return r;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> +   temporaries.  */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> new file mode 100644
> index 00000000000..b1a76b48953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs.  */
> +
> +struct s
> +{
> +  char d;
> +  int a, b;
> +  double m;
> +  int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> +  struct s temp_struct1;
> +  struct s temp_struct2;
> +  struct s temp_struct3;
> +  temp_struct1 = r;
> +  temp_struct2 = temp_struct1;
> +  temp_struct3 = temp_struct2;
> +  /* The call here should not make
> +     a difference in removing all of the copies. */
> +  g();
> +  return temp_struct3;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> +   temporaries.  */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
> new file mode 100644
> index 00000000000..96efafbe20c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c
> @@ -0,0 +1,41 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs.  */
> +
> +struct s
> +{
> +  int t[1024];
> +};
> +
> +[[gnu::noinline]]
> +void g(float *a)
> +{
> +  *a = 1.0;
> +}
> +
> +struct s r;
> +struct s foo (float *a)
> +{
> +  struct s temp_struct1;
> +  struct s temp_struct2;
> +  struct s temp_struct3;
> +  temp_struct1 = r;
> +  temp_struct2 = temp_struct1;
> +  temp_struct3 = temp_struct2;
> +  /* The call here should not make
> +     a difference in removing all of the copies,
> +     this is true as we end up with r = r; g should be known only to change
> +     float types so can be ignored with TBAA. */
> +  g(a);
> +  r = temp_struct3;
> +  return r;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> +   temporaries.  */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 242538ccc62..ec03a93da6d 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1191,55 +1191,17 @@ constant_pointer_difference (tree p1, tree p2)
>    return NULL_TREE;
>  }
>
> -
> -/* Optimize
> -   a = {};
> -   b = a;
> -   into
> -   a = {};
> -   b = {};
> -   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> -   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> -
>  static bool
> -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, 
> tree len)
> +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree 
> src, tree len, gimple *stmt, gimple *defstmt)
>  {
> -  ao_ref read;
> -  gimple *stmt = gsi_stmt (*gsip);
> -  if (gimple_has_volatile_ops (stmt))
> -    return false;
> -
> -  tree src2 = NULL_TREE, len2 = NULL_TREE;
>    poly_int64 offset, offset2;
>    tree val = integer_zero_node;
> -  bool len_was_null = len == NULL_TREE;
> -  if (len == NULL_TREE)
> -    len = (TREE_CODE (src) == COMPONENT_REF
> -          ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> -          : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> +  tree len2 = NULL_TREE;
> +  tree src2 = NULL_TREE;
>    if (len == NULL_TREE
>        || !poly_int_tree_p (len))
>      return false;
>
> -  ao_ref_init (&read, src);
> -  tree vuse = gimple_vuse (stmt);
> -  gimple *defstmt;
> -  unsigned limit = param_sccvn_max_alias_queries_per_access;
> -  do {
> -    if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> -      return false;
> -    defstmt = SSA_NAME_DEF_STMT (vuse);
> -    if (is_a <gphi*>(defstmt))
> -      return false;
> -    if (limit-- == 0)
> -      return false;
> -    /* If the len was null, then we can use TBBA. */
> -    if (stmt_may_clobber_ref_p_1 (defstmt, &read,
> -                                 /* tbaa_p = */ len_was_null))
> -      break;
> -    vuse = gimple_vuse (defstmt);
> -  } while (true);
> -
>    if (gimple_store_p (defstmt)
>        && gimple_assign_single_p (defstmt)
>        && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST
> @@ -1341,6 +1303,50 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>      }
>    return true;
>  }
> +
> +/* Optimize
> +   a = {};
> +   b = a;
> +   into
> +   a = {};
> +   b = {};
> +   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> +   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> +
> +static bool
> +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, 
> tree len)
> +{
> +  gcc_assert (len);
> +  ao_ref read;
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt))
> +    return false;
> +
> +  if (!poly_int_tree_p (len))
> +    return false;
> +
> +  ao_ref_init (&read, src);
> +  tree vuse = gimple_vuse (stmt);
> +  gimple *defstmt;
> +  unsigned limit = param_sccvn_max_alias_queries_per_access;
> +  do {
> +    if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> +      return false;
> +    defstmt = SSA_NAME_DEF_STMT (vuse);
> +    if (is_a <gphi*>(defstmt))
> +      return false;
> +    if (limit-- == 0)
> +      return false;
> +    if (stmt_may_clobber_ref_p_1 (defstmt, &read,
> +                                 /* tbaa_p = */ false))
> +      break;
> +    vuse = gimple_vuse (defstmt);
> +  } while (true);
> +
> +  return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt);
> +}
> +
> +
>  /* Optimizes
>     a = c;
>     b = a;
> @@ -1374,7 +1380,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, 
> tree dest, tree src)
>        gsi_replace (gsip, gimple_build_nop (), false);
>        return true;
>      }
> -  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +  gimple *defstmt;
> +  bool callinbetween = false;
> +
> +  ao_ref read;
> +  ao_ref_init (&read, src);
> +  vuse = gimple_vuse (stmt);
> +  unsigned limit = param_sccvn_max_alias_queries_per_access;
> +  do {
> +    if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> +      return false;
> +    defstmt = SSA_NAME_DEF_STMT (vuse);
> +    if (is_a <gphi*>(defstmt))
> +      return false;
> +    if (limit-- == 0)
> +      return false;
> +    callinbetween |= is_a <gcall *>(defstmt);
> +    if (stmt_may_clobber_ref_p_1 (defstmt, &read, true))
> +      break;
> +    vuse = gimple_vuse (defstmt);
> +  } while (true);
> +
> +  tree len = (TREE_CODE (src) == COMPONENT_REF
> +             ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> +             : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> +  if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt))
> +    return true;
> +
>    if (!gimple_assign_load_p (defstmt)
>        || !gimple_store_p (defstmt))
>      return false;
> @@ -1390,6 +1422,30 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, 
> tree dest, tree src)
>    if (!operand_equal_p (src, dest2, 0))
>      return false;
>
> +  /* Check that the src2 has not changed. */
> +  ao_ref_init (&read, src2);
> +  vuse = gimple_vuse (stmt);
> +  limit = param_sccvn_max_alias_queries_per_access;
> +  gimple *defstmt1;
> +  do {
> +    if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME)
> +      return false;
> +    defstmt1 = SSA_NAME_DEF_STMT (vuse);
> +    if (defstmt == defstmt1)
> +      break;
> +    if (is_a <gphi*>(defstmt1))
> +      return false;
> +    if (limit-- == 0)
> +      return false;
> +    if (stmt_may_clobber_ref_p_1 (defstmt1, &read, true))
> +      {
> +       if (defstmt != defstmt1)
> +         return false;
> +       break;
> +      }
> +    vuse = gimple_vuse (defstmt1);
> +  } while (true);
> +
>    /* If replacing with the same thing, then change the statement
>       into a NOP. */
>    if (operand_equal_p (src2, dest, 0))
> @@ -1407,6 +1463,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree 
> dest, tree src)
>        gsi_replace (gsip, gimple_build_nop (), false);
>        return true;
>      }
> +
>    /* For 2 memory refences and using a temporary to do the copy,
>       don't remove the temporary as the 2 memory references might overlap.
>       Note t does not need to be decl as it could be field.
> @@ -4806,14 +4863,6 @@ pass_forwprop::execute (function *fun)
>                     enum tree_code code = gimple_assign_rhs_code (stmt);
>                     if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
>                       {
> -                       if (optimize_memcpy_to_memset (&gsi,
> -                                                      gimple_assign_lhs 
> (stmt),
> -                                                      gimple_assign_rhs1 
> (stmt),
> -                                                      /* len = */NULL_TREE))
> -                         {
> -                           changed = true;
> -                           break;
> -                         }
>                         if (optimize_agr_copyprop (&gsi, gimple_assign_lhs 
> (stmt),
>                                                    gimple_assign_rhs1 (stmt)))
>                           {
> --
> 2.43.0
>

Reply via email to