On Tue, Sep 9, 2025 at 6:17 AM Andrew Pinski
<andrew.pin...@oss.qualcomm.com> wrote:
>
> It turns out easy to add support for memcpy copy prop when the memcpy
> has changed into `MEM<char[N]>` copy.
> Instead of rejecting right out we need to figure out that
> `a` and `MEM<char[N]>[&a]` are equivalent in terms of address and size.
> And then create a VIEW_CONVER_EXPR from the original src to the new type.
>
> Note this also allows for `a.b` and `a` being considered equivalent if b is 
> the
> only field (PR 121751).

OK.

Thanks,
Richard.

> Changes since v1:
> * v2: Move check for IMAG/REAL and BFR earlier.
>       Add a wrapping function around get_inner_reference and use that instead
>      of get_addr_base_and_unit_offset.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
>         PR tree-optimization/121751
>         PR tree-optimization/121418
>         PR tree-optimization/121417
> gcc/ChangeLog:
>
>         * tree-ssa-forwprop.cc (split_core_and_offset_size): New function.
>         (optimize_agr_copyprop_1): Allow for the same
>         address but different type accesses via a VCE.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/copy-prop-aggregate-1.c: New test.
>         * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c: New test.
>         * gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c: New test.
>
> Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
> ---
>  .../gcc.dg/tree-ssa/copy-prop-aggregate-1.c   |  33 ++++++
>  .../tree-ssa/copy-prop-aggregate-memcpy-1.c   |  18 ++++
>  .../tree-ssa/copy-prop-aggregate-memcpy-2.c   |  20 ++++
>  gcc/tree-ssa-forwprop.cc                      | 100 +++++++++++++++++-
>  4 files changed, 170 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c
>  create mode 100644 
> gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c
>  create mode 100644 
> gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c
> new file mode 100644
> index 00000000000..1094c4d768b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-1.c
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } 
> */
> +/* PR tree-optimization/121751 */
> +
> +
> +struct s1
> +{
> +  int t[1024];
> +};
> +
> +struct s2 {
> +  struct s1 t;
> +};
> +
> +struct s3
> +{
> +  struct s2 t;
> +};
> +
> +void g(struct s3*);
> +
> +void f(struct s1 s)
> +{
> +  struct s2 removeme;
> +  removeme.t = s;
> +  struct s3 t1;
> +  t1.t = removeme;
> +  g(&t1);
> +}
> +
> +
> +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "removeme " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c
> new file mode 100644
> index 00000000000..5faf6d0bf9b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } 
> */
> +/* PR tree-optimization/121418 */
> +
> +struct s1
> +{
> +  unsigned char t[1024];
> +};
> +
> +struct s1 f(struct s1 a)
> +{
> +  struct s1 removeme1 = a;
> +  __builtin_memcpy (&removeme1, &a, sizeof(struct s1));
> +  return removeme1;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c
> new file mode 100644
> index 00000000000..b1ba30d0aba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-memcpy-2.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-forwprop1-details -fdump-tree-optimized" } 
> */
> +/* PR tree-optimization/121417 */
> +
> +struct s1
> +{
> +  unsigned char t[1024];
> +};
> +
> +struct s1 f(struct s1 a)
> +{
> +  struct s1 removeme1 = a;
> +  struct s1 removeme2;
> +  __builtin_memcpy (&removeme2, &removeme1, sizeof(struct s1));
> +  return removeme2;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "after previous" 2 "forwprop1" } } */
> +/* { dg-final { scan-tree-dump-not "removeme1 " "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "removeme2 " "optimized" } } */
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 9c6f4b355d6..1eacff01587 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1418,6 +1418,46 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, 
> bool full_walk)
>    return changed;
>  }
>
> +/* Returns the pointer to the base of the object of the
> +   reference EXPR and extracts the information about
> +   the offset of the access, storing it to PBYTESIZE,
> +   PBYTEPOS and PREVERSEP.
> +   If the access is not a byte sized or position is not
> +   on the byte, return NULL.  */
> +static tree
> +split_core_and_offset_size (tree expr,
> +                           poly_int64 *pbytesize, poly_int64 *pbytepos,
> +                           tree *poffset, int *preversep)
> +{
> +  tree core;
> +  machine_mode mode;
> +  int unsignedp, volatilep;
> +  poly_int64 bitsize;
> +  poly_int64 bitpos;
> +  location_t loc = EXPR_LOCATION (expr);
> +
> +  core = get_inner_reference (expr, &bitsize, &bitpos,
> +                             poffset, &mode, &unsignedp, preversep,
> +                             &volatilep);
> +  if (!multiple_p (bitsize, BITS_PER_UNIT, pbytesize))
> +    return NULL_TREE;
> +  if (!multiple_p (bitpos, BITS_PER_UNIT, pbytepos))
> +    return NULL_TREE;
> +  /* If we are left with MEM[a + CST] strip that and add it to the
> +     pbytepos and return a. */
> +  if (TREE_CODE (core) == MEM_REF)
> +    {
> +      poly_offset_int tem;
> +      tem = wi::to_poly_offset (TREE_OPERAND (core, 1));
> +      tem += *pbytepos;
> +      if (tem.to_shwi (pbytepos))
> +       return TREE_OPERAND (core, 0);
> +    }
> +  core = build_fold_addr_expr_loc (loc, core);
> +  STRIP_NOPS (core);
> +  return core;
> +}
> +
>  /* Helper function for optimize_agr_copyprop.
>     For aggregate copies in USE_STMT, see if DEST
>     is on the lhs of USE_STMT and replace it with SRC. */
> @@ -1434,8 +1474,66 @@ optimize_agr_copyprop_1 (gimple *stmt, gimple 
> *use_stmt,
>    /* If the new store is `src2 = src2;` skip over it. */
>    if (operand_equal_p (src2, dest2, 0))
>      return false;
> +  /* If the second src is not exactly the same as dest,
> +     try to handle it seperately; see it is address/size equivalent.
> +     Handles `a` and `a.b` and `MEM<char[N]>(&a)` which all have
> +     the same size and offsets as address/size equivalent.
> +     This allows copying over a memcpy and also one for copying
> +     where one field is the same size as the whole struct.  */
>    if (!operand_equal_p (dest, src2, 0))
> -    return false;
> +    {
> +      /* A VCE can't be used with imag/real or BFR so reject them early. */
> +      if (TREE_CODE (src) == IMAGPART_EXPR
> +         || TREE_CODE (src) == REALPART_EXPR
> +         || TREE_CODE (src) == BIT_FIELD_REF)
> +       return false;
> +      tree core1, core2;
> +      poly_int64 bytepos1, bytepos2;
> +      poly_int64 bytesize1, bytesize2;
> +      tree toffset1, toffset2;
> +      int reversep1 = 0;
> +      int reversep2 = 0;
> +      poly_int64 diff = 0;
> +      core1 = split_core_and_offset_size (dest, &bytesize1, &bytepos1,
> +                                         &toffset1, &reversep1);
> +      core2 = split_core_and_offset_size (src2, &bytesize2, &bytepos2,
> +                                         &toffset2, &reversep2);
> +      if (!core1 || !core2)
> +       return false;
> +      if (reversep1 != reversep2)
> +       return false;
> +      /* The sizes of the 2 accesses need to be the same. */
> +      if (!known_eq (bytesize1, bytesize2))
> +       return false;
> +      if (!operand_equal_p (core1, core2, 0))
> +       return false;
> +
> +      if (toffset1 && toffset2)
> +       {
> +         tree type = TREE_TYPE (toffset1);
> +         if (type != TREE_TYPE (toffset2))
> +           toffset2 = fold_convert (type, toffset2);
> +
> +         tree tdiff = fold_build2 (MINUS_EXPR, type, toffset1, toffset2);
> +         if (!cst_and_fits_in_hwi (tdiff))
> +           return false;
> +
> +         diff = int_cst_value (tdiff);
> +       }
> +      else if (toffset1 || toffset2)
> +       {
> +         /* If only one of the offsets is non-constant, the difference cannot
> +            be a constant.  */
> +         return false;
> +       }
> +      diff += bytepos1 - bytepos2;
> +      /* The offset between the 2 need to be 0. */
> +      if (!known_eq (diff, 0))
> +       return false;
> +      src = fold_build1_loc (gimple_location (use_stmt),
> +                            VIEW_CONVERT_EXPR,
> +                            TREE_TYPE (src2), src);
> +    }
>    /* For 2 memory refences and using a temporary to do the copy,
>       don't remove the temporary as the 2 memory references might overlap.
>       Note t does not need to be decl as it could be field.
> --
> 2.43.0
>

Reply via email to