On Fri, Jun 6, 2025 at 11:50 AM Andrew Pinski <quic_apin...@quicinc.com> wrote:
>
> This improves copy prop for aggregates by working over statements that don't 
> modify the access
> just like how it is done for copying zeros.
> To speed up things, we should only have one loop back on the vuse instead of 
> doing it twice for
> each load/store assignments.

This needs one minor testsuite change for aarch64
(gcc.target/aarch64/vld2-1.c) which I will do if this patch gets
approved.

Thanks,
Andrew

>
>         PR tree-optimization/14295
>
> gcc/ChangeLog:
>
>         * tree-ssa-forwprop.cc (find_clobbering_stmt): New function split out
>         from optimize_memcpy_to_memset.
>         (optimize_memcpy_to_memset): Split rewriting part out into ...
>         (optimize_memcpy_to_memset_1): This function.
>         (optimize_agr_copyprop): Call find_clobbering_stmt.
>         Call optimize_memcpy_to_memset_1. Check that the src
>         variable is not clobbered between the 2 statements.
>         (pass_forwprop::execute): Call optimize_agr_copyprop only instead of
>         optimize_memcpy_to_memset too.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test.
>         * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> ---
>  .../gcc.dg/tree-ssa/copy-prop-arg-1.c         |  37 ++++
>  .../gcc.dg/tree-ssa/copy-prop-arg-2.c         |  35 ++++
>  gcc/tree-ssa-forwprop.cc                      | 158 ++++++++++++------
>  3 files changed, 180 insertions(+), 50 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> new file mode 100644
> index 00000000000..876556fc464
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs.  */
> +
> +struct s
> +{
> +  char d;
> +  int a, b;
> +  double m;
> +  int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> +  struct s temp_struct1;
> +  struct s temp_struct2;
> +  struct s temp_struct3;
> +  temp_struct1 = r;
> +  temp_struct2 = temp_struct1;
> +  temp_struct3 = temp_struct2;
> +  /* The call here should not make
> +     a difference in removing all of the copies,
> +     this is true as we end up with r = r; */
> +  g();
> +  r = temp_struct3;
> +  return r;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> +   temporaries.  */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> new file mode 100644
> index 00000000000..b1a76b48953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> -fdump-tree-forwprop1-details" } */
> +
> +/* PR tree-optimization/14295 */
> +
> +/* Check for copyprop on structs.  */
> +
> +struct s
> +{
> +  char d;
> +  int a, b;
> +  double m;
> +  int t[1024];
> +};
> +
> +void g();
> +struct s foo (struct s r)
> +{
> +  struct s temp_struct1;
> +  struct s temp_struct2;
> +  struct s temp_struct3;
> +  temp_struct1 = r;
> +  temp_struct2 = temp_struct1;
> +  temp_struct3 = temp_struct2;
> +  /* The call here should not make
> +     a difference in removing all of the copies. */
> +  g();
> +  return temp_struct3;
> +}
> +
> +/* There should be no references to any of "temp_struct*"
> +   temporaries.  */
> +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> +/* Also check that forwprop pass did the copy prop. */
> +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 27197bbc076..7a9a6357f7f 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1190,56 +1190,21 @@ constant_pointer_difference (tree p1, tree p2)
>    return NULL_TREE;
>  }
>
> -
> -/* Optimize
> -   a = {};
> -   b = a;
> -   into
> -   a = {};
> -   b = {};
> -   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};
> -   and/or memcpy (&b, &a, sizeof (a)); instead of b = a;  */
> +/* Tries to prop a zero store from DEFSTMT into STMT (GSIP).
> +   Returns true if the proping happened.  */
>
>  static bool
> -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, 
> tree len)
> +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree src,
> +                            tree len, gimple *stmt, gimple *defstmt)
>  {
> -  ao_ref read;
> -  gimple *stmt = gsi_stmt (*gsip);
> -  if (gimple_has_volatile_ops (stmt))
> -    return false;
> -
> -  tree src2 = NULL_TREE, len2 = NULL_TREE;
>    poly_int64 offset, offset2;
>    tree val = integer_zero_node;
> -  bool len_was_null = len == NULL_TREE;
> -  if (len == NULL_TREE)
> -    len = (TREE_CODE (src) == COMPONENT_REF
> -          ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> -          : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> +  tree len2 = NULL_TREE;
> +  tree src2 = NULL_TREE;
>    if (len == NULL_TREE
>        || !poly_int_tree_p (len))
>      return false;
>
> -  ao_ref_init (&read, src);
> -  tree vuse = gimple_vuse (stmt);
> -  gimple *defstmt;
> -  unsigned limit = param_sccvn_max_alias_queries_per_access;
> -  do {
> -    /* If the vuse is the default definition, then there is no stores 
> beforhand. */
> -    if (SSA_NAME_IS_DEFAULT_DEF (vuse))
> -      return false;
> -    defstmt = SSA_NAME_DEF_STMT (vuse);
> -    if (is_a <gphi*>(defstmt))
> -      return false;
> -    if (limit-- == 0)
> -      return false;
> -    /* If the len was null, then we can use TBBA. */
> -    if (stmt_may_clobber_ref_p_1 (defstmt, &read,
> -                                 /* tbaa_p = */ len_was_null))
> -      break;
> -    vuse = gimple_vuse (defstmt);
> -  } while (true);
> -
>    if (gimple_store_p (defstmt)
>        && gimple_assign_single_p (defstmt)
>        && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST
> @@ -1343,6 +1308,64 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>      }
>    return true;
>  }
> +
> +
> +/* Finds the first statement starting at STMT that clobbers SRC.
> +   Returns false if there is none (Due to PHI or otherwise). */
> +
> +static bool
> +find_clobbering_stmt (tree src, gimple *stmt, gimple **clobber_stmt)
> +{
> +  gimple *defstmt;
> +  ao_ref read;
> +  ao_ref_init (&read, src);
> +  tree vuse = gimple_vuse (stmt);
> +  unsigned limit = param_sccvn_max_alias_queries_per_access;
> +  do {
> +    /* If the vuse is the default definition, then there is no stores 
> beforhand. */
> +    if (SSA_NAME_IS_DEFAULT_DEF (vuse))
> +      return false;
> +    defstmt = SSA_NAME_DEF_STMT (vuse);
> +    if (is_a <gphi*>(defstmt))
> +      return false;
> +    if (limit-- == 0)
> +      return false;
> +    if (stmt_may_clobber_ref_p_1 (defstmt, &read, true))
> +      break;
> +    vuse = gimple_vuse (defstmt);
> +  } while (true);
> +  *clobber_stmt = defstmt;
> +  return true;
> +}
> +
> +
> +/* Optimize
> +   a = {};
> +   memcpy (&b, &a, sizeof (a));
> +   into
> +   a = {};
> +   b = {};
> +   Similarly for memset (&a, ..., sizeof (a)); instead of a = {};  */
> +
> +static bool
> +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, 
> tree len)
> +{
> +  gcc_assert (len);
> +  gimple *stmt = gsi_stmt (*gsip);
> +  if (gimple_has_volatile_ops (stmt))
> +    return false;
> +
> +  if (!poly_int_tree_p (len))
> +    return false;
> +
> +  gimple *defstmt;
> +  if (!find_clobbering_stmt (src, stmt, &defstmt))
> +    return false;
> +
> +  return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt);
> +}
> +
> +
>  /* Optimizes
>     a = c;
>     b = a;
> @@ -1351,6 +1374,10 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, 
> tree dest, tree src, tree
>     b = c;
>     GSIP is the second statement and SRC is the common
>     between the statements.
> +   Also handles if store to a is:
> +   a = {};
> +   or
> +   memset (&a, ..., sizeof (a));
>  */
>  static bool
>  optimize_agr_copyprop (gimple_stmt_iterator *gsip)
> @@ -1369,7 +1396,19 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
>    /* If the vuse is the default definition, then there is no store 
> beforehand.  */
>    if (SSA_NAME_IS_DEFAULT_DEF (vuse))
>      return false;
> -  gimple *defstmt = SSA_NAME_DEF_STMT (vuse);
> +
> +  gimple *defstmt;
> +  if (!find_clobbering_stmt (src, stmt, &defstmt))
> +    return false;
> +
> +  /* See if the first store is a storing of zero, then
> +     optimize stmt that way. */
> +  tree len = (TREE_CODE (src) == COMPONENT_REF
> +             ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1))
> +             : TYPE_SIZE_UNIT (TREE_TYPE (src)));
> +  if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt))
> +    return true;
> +
>    if (!gimple_assign_load_p (defstmt)
>        || !gimple_store_p (defstmt))
>      return false;
> @@ -1385,6 +1424,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
>    if (!operand_equal_p (src, dest2, 0))
>      return false;
>
> +  /* Check that the src2 has not changed. */
> +  ao_ref read;
> +  ao_ref_init (&read, src2);
> +  vuse = gimple_vuse (stmt);
> +  unsigned limit = param_sccvn_max_alias_queries_per_access;
> +  gimple *defstmt1;
> +  do {
> +    /* If the vuse is the default definition, then there is no stores 
> beforhand. */
> +    if (SSA_NAME_IS_DEFAULT_DEF (vuse))
> +      return false;
> +    defstmt1 = SSA_NAME_DEF_STMT (vuse);
> +    if (defstmt == defstmt1)
> +      break;
> +    if (is_a <gphi*>(defstmt1))
> +      return false;
> +    if (limit-- == 0)
> +      return false;
> +    /* Even though this is moving a load past the statement, this needs to
> +       be treated like moving a store.  */
> +    if (stmt_may_clobber_ref_p_1 (defstmt1, &read, false))
> +      {
> +       if (defstmt != defstmt1)
> +         return false;
> +       break;
> +      }
> +    vuse = gimple_vuse (defstmt1);
> +  } while (true);
>
>    /* For 2 memory refences and using a temporary to do the copy,
>       don't remove the temporary as the 2 memory references might overlap.
> @@ -4798,14 +4864,6 @@ pass_forwprop::execute (function *fun)
>                     enum tree_code code = gimple_assign_rhs_code (stmt);
>                     if (gimple_assign_load_p (stmt) && gimple_store_p (stmt))
>                       {
> -                       if (optimize_memcpy_to_memset (&gsi,
> -                                                      gimple_assign_lhs 
> (stmt),
> -                                                      gimple_assign_rhs1 
> (stmt),
> -                                                      /* len = */NULL_TREE))
> -                         {
> -                           changed = true;
> -                           break;
> -                         }
>                         if (optimize_agr_copyprop (&gsi))
>                           {
>                             changed = true;
> --
> 2.43.0
>

Reply via email to