On Fri, Aug 8, 2025 at 11:56 PM Andrew Pinski <pins...@gmail.com> wrote: > > On Thu, Aug 7, 2025 at 4:29 AM Richard Biener > <richard.guent...@gmail.com> wrote: > > > > On Wed, Aug 6, 2025 at 7:30 PM Andrew Pinski <quic_apin...@quicinc.com> > > wrote: > > > > > > While looking into the gimple level after optimization of the highway code > > > from google, I noticed in .optimized we still have: > > > ``` > > > MEM <vector(8) short int> [(short int *)&a] = { 0, 0, 0, 0, 0, 0, 0, 0 > > > }; > > > D.4398 = a; > > > a ={v} {CLOBBER(eos)}; > > > D.4389 = D.4398; > > > D.4390 = D.4389; > > > D.4361 = D.4390; > > > D.4195 = D.4361; > > > return D.4195; > > > ``` > > > Note this is with SRA disabled since I noticed there is better code > > > generation with > > > SRA disabled but that is a different story and I will get to that later > > > on. > > > > > > Which could be just optimized to a single store of `{}` . > > > > > > The reason why the optimize_agr_copyprop does not handle the above is > > > there was clobbers > > > inbetween the store in the last forwprop pass and currently don't copy > > > after the first use. > > > While optimize_aggr_zeroprop does handle copying over clobbers just fine. > > > > > > So this allows the recognization of the store to a to be like a memset to > > > optimize_aggr_zeroprop > > > and then the result just falls through. > > > > > > Bootstrapped and tested on x86_64-linux-gnu. > > > > > > gcc/ChangeLog: > > > > > > * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Recognize stores > > > of integer_zerop as memset of 0. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.dg/torture/copy-prop-aggr-zero-1.c: New test. > > > * gcc.dg/torture/copy-prop-aggr-zero-2.c: New test. > > > * gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c: New test. > > > * gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c: New test. > > > * gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c: New test. > > > > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > > > --- > > > .../gcc.dg/torture/copy-prop-aggr-zero-1.c | 28 +++++++++++++++++++ > > > .../gcc.dg/torture/copy-prop-aggr-zero-2.c | 28 +++++++++++++++++++ > > > .../tree-ssa/copy-prop-aggregate-zero-1.c | 28 +++++++++++++++++++ > > > .../tree-ssa/copy-prop-aggregate-zero-2.c | 25 +++++++++++++++++ > > > .../tree-ssa/copy-prop-aggregate-zero-3.c | 25 +++++++++++++++++ > > > gcc/tree-ssa-forwprop.cc | 15 ++++++++++ > > > 6 files changed, 149 insertions(+) > > > create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c > > > create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c > > > create mode 100644 > > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c > > > create mode 100644 > > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c > > > create mode 100644 > > > gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c > > > > > > diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c > > > b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c > > > new file mode 100644 > > > index 00000000000..5c457b9bc2f > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do run } */ > > > + > > > +/* Make sure a bit-field store of 0 cause the whole assignment become 0. > > > */ > > > + > > > +struct s1 > > > +{ > > > + unsigned char c:1; > > > + unsigned char d:7; > > > +}; > > > + > > > +__attribute__((noinline)) > > > +struct s1 f(struct s1 a) > > > +{ > > > + a.c = 0; > > > + struct s1 t = a; > > > + return t; > > > +} > > > + > > > +int main() > > > +{ > > > + struct s1 a = {1, 2}; > > > + struct s1 b = f(a); > > > + if (b.c != 0) > > > + __builtin_abort(); > > > + if (b.d != 2) > > > + __builtin_abort(); > > > + return 0; > > > +} > > > diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c > > > b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c > > > new file mode 100644 > > > index 00000000000..f1da1615e8e > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do run } */ > > > + > > > +/* Make sure a bit-field store of 0 cause the whole assignment become 0. > > > */ > > > + > > > +struct s1 > > > +{ > > > + unsigned char d:7; > > > + unsigned char c:1; > > > +}; > > > + > > > +__attribute__((noinline)) > > > +struct s1 f(struct s1 a) > > > +{ > > > + a.c = 0; > > > + struct s1 t = a; > > > + return t; > > > +} > > > + > > > +int main() > > > +{ > > > + struct s1 a = {2, 1}; > > > + struct s1 b = f(a); > > > + if (b.c != 0) > > > + __builtin_abort(); > > > + if (b.d != 2) > > > + __builtin_abort(); > > > + return 0; > > > +} > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c > > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c > > > new file mode 100644 > > > index 00000000000..577a5b5817c > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c > > > @@ -0,0 +1,28 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > > > -fdump-tree-forwprop1-details" } */ > > > + > > > +extern void link_error (void); > > > + > > > +/* Check for copyprop on structs with zeroing. */ > > > +#define vector16 __attribute__((vector_size(64))) > > > + > > > +struct g > > > +{ > > > + vector16 unsigned char t; > > > +}; > > > + > > > +struct g f(void) > > > +{ > > > + struct g temp_struct1 ; > > > + temp_struct1.t = (vector16 unsigned char){}; > > > + struct g temp_struct2 = temp_struct1; > > > + struct g temp_struct3 = temp_struct2; > > > + struct g temp_struct4 = temp_struct3; > > > + return temp_struct4; > > > +} > > > + > > > +/* There should be no references to any of "temp_struct*" > > > + temporaries. */ > > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > > > +/* Also check that forwprop pass did the copy prop. */ > > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } > > > */ > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c > > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c > > > new file mode 100644 > > > index 00000000000..ce3c6129b1a > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c > > > @@ -0,0 +1,25 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > > > -fdump-tree-forwprop1-details" } */ > > > + > > > +extern void link_error (void); > > > + > > > +struct g > > > +{ > > > + unsigned int t; > > > +}; > > > + > > > +struct g f(void) > > > +{ > > > + struct g temp_struct1 ; > > > + temp_struct1.t = 0; > > > + struct g temp_struct2 = temp_struct1; > > > + struct g temp_struct3 = temp_struct2; > > > + struct g temp_struct4 = temp_struct3; > > > + return temp_struct4; > > > +} > > > + > > > +/* There should be no references to any of "temp_struct*" > > > + temporaries. */ > > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > > > +/* Also check that forwprop pass did the copy prop. */ > > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } > > > */ > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c > > > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c > > > new file mode 100644 > > > index 00000000000..94ce965e7ce > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c > > > @@ -0,0 +1,25 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > > > -fdump-tree-forwprop1-details" } */ > > > + > > > +extern void link_error (void); > > > + > > > +struct g > > > +{ > > > + _Complex unsigned int t; > > > +}; > > > + > > > +struct g f(void) > > > +{ > > > + struct g temp_struct1 ; > > > + temp_struct1.t = 0; > > > + struct g temp_struct2 = temp_struct1; > > > + struct g temp_struct3 = temp_struct2; > > > + struct g temp_struct4 = temp_struct3; > > > + return temp_struct4; > > > +} > > > + > > > +/* There should be no references to any of "temp_struct*" > > > + temporaries. */ > > > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > > > +/* Also check that forwprop pass did the copy prop. */ > > > +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } > > > */ > > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > > index 3d38d88844b..1cde5f85150 100644 > > > --- a/gcc/tree-ssa-forwprop.cc > > > +++ b/gcc/tree-ssa-forwprop.cc > > > @@ -1340,6 +1340,21 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) > > > } > > > } > > > } > > > + /* A store of integer (scalar, vector or complex) zeros is > > > + a zero store. */ > > > + else if (gimple_store_p (stmt) > > > + && gimple_assign_single_p (stmt) > > > + && integer_zerop (gimple_assign_rhs1 (stmt))) > > > > Do we do the correct things for bitfield stores or should we restrict > > this to mode-size or BLKmode types? > > Yes we handle bitfield stores as long as they long as they are > multiple of BITS_PER_UNIT. > e.g. (assume little-endian): > ``` > struct s1 > { > unsigned int c:24; > unsigned int d:8; > unsigned int e; > unsigned int f; > }; > > __attribute__((noinline)) > void f(struct s1 a, unsigned char *data) > { > a.c = 0; > __builtin_memcpy(data, &a, 3); > } > ``` > Is able to handle a 24bit (3 byte) integer store and changes > (correctly) the memcpy into a memset. > copy-prop-aggr-zero-[12].c tests the non-multiple case which I ran > into when I originally was writing this patch. > > > > > > + { > > > + tree rhs = gimple_assign_rhs1 (stmt); > > > + tree type = TREE_TYPE (rhs); > > > + dest = gimple_assign_lhs (stmt); > > > + ao_ref_init (&read, dest); > > > + /* For integral types, the type precision needs to be a multiply > > > of BITS_PER_UNIT. */ > > > + if (INTEGRAL_TYPE_P (type) > > > + && (TYPE_PRECISION (type) % BITS_PER_UNIT) != 0) > > > + dest = NULL_TREE; > > This is what limits to the precision to be a multiple of a byte sized > integer type.
The patch is OK then. Richard. > > Thanks, > Andrew > > > > + } > > > else if (gimple_store_p (stmt) > > > && gimple_assign_single_p (stmt) > > > && TREE_CODE (gimple_assign_rhs1 (stmt)) == CONSTRUCTOR > > > -- > > > 2.43.0 > > >