On Fri, Aug 8, 2025 at 11:56 PM Andrew Pinski <pins...@gmail.com> wrote:
>
> On Thu, Aug 7, 2025 at 4:29 AM Richard Biener
> <richard.guent...@gmail.com> wrote:
> >
> > On Wed, Aug 6, 2025 at 7:30 PM Andrew Pinski <quic_apin...@quicinc.com> 
> > wrote:
> > >
> > > While looking into the gimple level after optimization of the highway code
> > > from google, I noticed in .optimized we still have:
> > > ```
> > >   MEM <vector(8) short int> [(short int *)&a] = { 0, 0, 0, 0, 0, 0, 0, 0 
> > > };
> > >   D.4398 = a;
> > >   a ={v} {CLOBBER(eos)};
> > >   D.4389 = D.4398;
> > >   D.4390 = D.4389;
> > >   D.4361 = D.4390;
> > >   D.4195 = D.4361;
> > >   return D.4195;
> > > ```
> > > Note this is with SRA disabled since I noticed there is better code 
> > > generation with
> > > SRA disabled but that is a different story and I will get to that later 
> > > on.
> > >
> > > Which could be just optimized to a single store of `{}` .
> > >
> > > The reason why the optimize_agr_copyprop does not handle the above is 
> > > there was clobbers
> > > inbetween the store in the last forwprop pass and currently don't copy 
> > > after the first use.
> > > While optimize_aggr_zeroprop does handle copying over clobbers just fine.
> > >
> > > So this allows the recognization of the store to a to be like a memset to 
> > > optimize_aggr_zeroprop
> > > and then the result just falls through.
> > >
> > > Bootstrapped and tested on x86_64-linux-gnu.
> > >
> > > gcc/ChangeLog:
> > >
> > >         * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Recognize stores
> > >         of integer_zerop as memset of 0.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.dg/torture/copy-prop-aggr-zero-1.c: New test.
> > >         * gcc.dg/torture/copy-prop-aggr-zero-2.c: New test.
> > >         * gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c: New test.
> > >         * gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c: New test.
> > >         * gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com>
> > > ---
> > >  .../gcc.dg/torture/copy-prop-aggr-zero-1.c    | 28 +++++++++++++++++++
> > >  .../gcc.dg/torture/copy-prop-aggr-zero-2.c    | 28 +++++++++++++++++++
> > >  .../tree-ssa/copy-prop-aggregate-zero-1.c     | 28 +++++++++++++++++++
> > >  .../tree-ssa/copy-prop-aggregate-zero-2.c     | 25 +++++++++++++++++
> > >  .../tree-ssa/copy-prop-aggregate-zero-3.c     | 25 +++++++++++++++++
> > >  gcc/tree-ssa-forwprop.cc                      | 15 ++++++++++
> > >  6 files changed, 149 insertions(+)
> > >  create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c
> > >  create mode 100644 
> > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c
> > >  create mode 100644 
> > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c
> > >  create mode 100644 
> > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c
> > >
> > > diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c 
> > > b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c
> > > new file mode 100644
> > > index 00000000000..5c457b9bc2f
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do run } */
> > > +
> > > +/* Make sure a bit-field store of 0 cause the whole assignment become 0. 
> > > */
> > > +
> > > +struct s1
> > > +{
> > > +  unsigned char c:1;
> > > +  unsigned char d:7;
> > > +};
> > > +
> > > +__attribute__((noinline))
> > > +struct s1 f(struct s1 a)
> > > +{
> > > +  a.c = 0;
> > > +  struct s1 t = a;
> > > +  return t;
> > > +}
> > > +
> > > +int main()
> > > +{
> > > +  struct s1 a = {1, 2};
> > > +  struct s1 b = f(a);
> > > +  if (b.c != 0)
> > > +    __builtin_abort();
> > > +  if (b.d != 2)
> > > +    __builtin_abort();
> > > +  return 0;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c 
> > > b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c
> > > new file mode 100644
> > > index 00000000000..f1da1615e8e
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do run } */
> > > +
> > > +/* Make sure a bit-field store of 0 cause the whole assignment become 0. 
> > > */
> > > +
> > > +struct s1
> > > +{
> > > +  unsigned char d:7;
> > > +  unsigned char c:1;
> > > +};
> > > +
> > > +__attribute__((noinline))
> > > +struct s1 f(struct s1 a)
> > > +{
> > > +  a.c = 0;
> > > +  struct s1 t = a;
> > > +  return t;
> > > +}
> > > +
> > > +int main()
> > > +{
> > > +  struct s1 a = {2, 1};
> > > +  struct s1 b = f(a);
> > > +  if (b.c != 0)
> > > +    __builtin_abort();
> > > +  if (b.d != 2)
> > > +    __builtin_abort();
> > > +  return 0;
> > > +}
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c 
> > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c
> > > new file mode 100644
> > > index 00000000000..577a5b5817c
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c
> > > @@ -0,0 +1,28 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> > > -fdump-tree-forwprop1-details" } */
> > > +
> > > +extern void link_error (void);
> > > +
> > > +/* Check for copyprop on structs with zeroing.  */
> > > +#define vector16 __attribute__((vector_size(64)))
> > > +
> > > +struct g
> > > +{
> > > +  vector16 unsigned char t;
> > > +};
> > > +
> > > +struct g f(void)
> > > +{
> > > +  struct g temp_struct1 ;
> > > +  temp_struct1.t = (vector16 unsigned char){};
> > > +  struct g temp_struct2 = temp_struct1;
> > > +  struct g temp_struct3 = temp_struct2;
> > > +  struct g temp_struct4 = temp_struct3;
> > > +  return temp_struct4;
> > > +}
> > > +
> > > +/* There should be no references to any of "temp_struct*"
> > > +   temporaries.  */
> > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> > > +/* Also check that forwprop pass did the copy prop. */
> > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } 
> > > */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c 
> > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c
> > > new file mode 100644
> > > index 00000000000..ce3c6129b1a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c
> > > @@ -0,0 +1,25 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> > > -fdump-tree-forwprop1-details" } */
> > > +
> > > +extern void link_error (void);
> > > +
> > > +struct g
> > > +{
> > > +  unsigned int t;
> > > +};
> > > +
> > > +struct g f(void)
> > > +{
> > > +  struct g temp_struct1 ;
> > > +  temp_struct1.t = 0;
> > > +  struct g temp_struct2 = temp_struct1;
> > > +  struct g temp_struct3 = temp_struct2;
> > > +  struct g temp_struct4 = temp_struct3;
> > > +  return temp_struct4;
> > > +}
> > > +
> > > +/* There should be no references to any of "temp_struct*"
> > > +   temporaries.  */
> > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> > > +/* Also check that forwprop pass did the copy prop. */
> > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } 
> > > */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c 
> > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c
> > > new file mode 100644
> > > index 00000000000..94ce965e7ce
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c
> > > @@ -0,0 +1,25 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized 
> > > -fdump-tree-forwprop1-details" } */
> > > +
> > > +extern void link_error (void);
> > > +
> > > +struct g
> > > +{
> > > +  _Complex unsigned int t;
> > > +};
> > > +
> > > +struct g f(void)
> > > +{
> > > +  struct g temp_struct1 ;
> > > +  temp_struct1.t = 0;
> > > +  struct g temp_struct2 = temp_struct1;
> > > +  struct g temp_struct3 = temp_struct2;
> > > +  struct g temp_struct4 = temp_struct3;
> > > +  return temp_struct4;
> > > +}
> > > +
> > > +/* There should be no references to any of "temp_struct*"
> > > +   temporaries.  */
> > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */
> > > +/* Also check that forwprop pass did the copy prop. */
> > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } 
> > > */
> > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> > > index 3d38d88844b..1cde5f85150 100644
> > > --- a/gcc/tree-ssa-forwprop.cc
> > > +++ b/gcc/tree-ssa-forwprop.cc
> > > @@ -1340,6 +1340,21 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
> > >             }
> > >         }
> > >      }
> > > +  /* A store of integer (scalar, vector or complex) zeros is
> > > +     a zero store. */
> > > +  else if (gimple_store_p (stmt)
> > > +          && gimple_assign_single_p (stmt)
> > > +          && integer_zerop (gimple_assign_rhs1 (stmt)))
> >
> > Do we do the correct things for bitfield stores or should we restrict
> > this to mode-size or BLKmode types?
>
> Yes we handle bitfield stores as long as they long as they are
> multiple of BITS_PER_UNIT.
> e.g. (assume little-endian):
> ```
> struct s1
> {
>   unsigned int c:24;
>   unsigned int d:8;
>   unsigned int e;
>   unsigned int f;
> };
>
> __attribute__((noinline))
> void f(struct s1 a, unsigned char *data)
> {
>   a.c = 0;
>   __builtin_memcpy(data, &a, 3);
> }
> ```
> Is able to handle a 24bit (3 byte) integer store and changes
> (correctly) the memcpy into a memset.
> copy-prop-aggr-zero-[12].c tests the non-multiple case which I ran
> into when I originally was writing this patch.
>
> >
> > > +    {
> > > +      tree rhs = gimple_assign_rhs1 (stmt);
> > > +      tree type = TREE_TYPE (rhs);
> > > +      dest = gimple_assign_lhs (stmt);
> > > +      ao_ref_init (&read, dest);
> > > +      /* For integral types, the type precision needs to be a multiply 
> > > of BITS_PER_UNIT. */
> > > +      if (INTEGRAL_TYPE_P (type)
> > > +         && (TYPE_PRECISION (type) % BITS_PER_UNIT) != 0)
> > > +       dest = NULL_TREE;
>
> This is what limits to the precision to be a multiple of a byte sized
> integer type.

The patch is OK then.

Richard.

>
> Thanks,
> Andrew
>
> > > +    }
> > >    else if (gimple_store_p (stmt)
> > >            && gimple_assign_single_p (stmt)
> > >            && TREE_CODE (gimple_assign_rhs1 (stmt)) == CONSTRUCTOR
> > > --
> > > 2.43.0
> > >

Reply via email to