While looking into the gimple level after optimization of the highway code from google, I noticed in .optimized we still have: ``` MEM <vector(8) short int> [(short int *)&a] = { 0, 0, 0, 0, 0, 0, 0, 0 }; D.4398 = a; a ={v} {CLOBBER(eos)}; D.4389 = D.4398; D.4390 = D.4389; D.4361 = D.4390; D.4195 = D.4361; return D.4195; ``` Note this is with SRA disabled since I noticed there is better code generation with SRA disabled but that is a different story and I will get to that later on.
Which could be just optimized to a single store of `{}` . The reason why the optimize_agr_copyprop does not handle the above is there was clobbers inbetween the store in the last forwprop pass and currently don't copy after the first use. While optimize_aggr_zeroprop does handle copying over clobbers just fine. So this allows the recognization of the store to a to be like a memset to optimize_aggr_zeroprop and then the result just falls through. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Recognize stores of integer_zerop as memset of 0. gcc/testsuite/ChangeLog: * gcc.dg/torture/copy-prop-aggr-zero-1.c: New test. * gcc.dg/torture/copy-prop-aggr-zero-2.c: New test. * gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c: New test. * gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c: New test. * gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- .../gcc.dg/torture/copy-prop-aggr-zero-1.c | 28 +++++++++++++++++++ .../gcc.dg/torture/copy-prop-aggr-zero-2.c | 28 +++++++++++++++++++ .../tree-ssa/copy-prop-aggregate-zero-1.c | 28 +++++++++++++++++++ .../tree-ssa/copy-prop-aggregate-zero-2.c | 25 +++++++++++++++++ .../tree-ssa/copy-prop-aggregate-zero-3.c | 25 +++++++++++++++++ gcc/tree-ssa-forwprop.cc | 15 ++++++++++ 6 files changed, 149 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c create mode 100644 gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c new file mode 100644 index 00000000000..5c457b9bc2f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-1.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +/* Make sure a bit-field store of 0 cause the whole assignment become 0. */ + +struct s1 +{ + unsigned char c:1; + unsigned char d:7; +}; + +__attribute__((noinline)) +struct s1 f(struct s1 a) +{ + a.c = 0; + struct s1 t = a; + return t; +} + +int main() +{ + struct s1 a = {1, 2}; + struct s1 b = f(a); + if (b.c != 0) + __builtin_abort(); + if (b.d != 2) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c new file mode 100644 index 00000000000..f1da1615e8e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/copy-prop-aggr-zero-2.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ + +/* Make sure a bit-field store of 0 cause the whole assignment become 0. */ + +struct s1 +{ + unsigned char d:7; + unsigned char c:1; +}; + +__attribute__((noinline)) +struct s1 f(struct s1 a) +{ + a.c = 0; + struct s1 t = a; + return t; +} + +int main() +{ + struct s1 a = {2, 1}; + struct s1 b = f(a); + if (b.c != 0) + __builtin_abort(); + if (b.d != 2) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c new file mode 100644 index 00000000000..577a5b5817c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +extern void link_error (void); + +/* Check for copyprop on structs with zeroing. */ +#define vector16 __attribute__((vector_size(64))) + +struct g +{ + vector16 unsigned char t; +}; + +struct g f(void) +{ + struct g temp_struct1 ; + temp_struct1.t = (vector16 unsigned char){}; + struct g temp_struct2 = temp_struct1; + struct g temp_struct3 = temp_struct2; + struct g temp_struct4 = temp_struct3; + return temp_struct4; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c new file mode 100644 index 00000000000..ce3c6129b1a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +extern void link_error (void); + +struct g +{ + unsigned int t; +}; + +struct g f(void) +{ + struct g temp_struct1 ; + temp_struct1.t = 0; + struct g temp_struct2 = temp_struct1; + struct g temp_struct3 = temp_struct2; + struct g temp_struct4 = temp_struct3; + return temp_struct4; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c new file mode 100644 index 00000000000..94ce965e7ce --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-zero-3.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +extern void link_error (void); + +struct g +{ + _Complex unsigned int t; +}; + +struct g f(void) +{ + struct g temp_struct1 ; + temp_struct1.t = 0; + struct g temp_struct2 = temp_struct1; + struct g temp_struct3 = temp_struct2; + struct g temp_struct4 = temp_struct3; + return temp_struct4; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 4 "forwprop1" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 3d38d88844b..1cde5f85150 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -1340,6 +1340,21 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) } } } + /* A store of integer (scalar, vector or complex) zeros is + a zero store. */ + else if (gimple_store_p (stmt) + && gimple_assign_single_p (stmt) + && integer_zerop (gimple_assign_rhs1 (stmt))) + { + tree rhs = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (rhs); + dest = gimple_assign_lhs (stmt); + ao_ref_init (&read, dest); + /* For integral types, the type precision needs to be a multiply of BITS_PER_UNIT. */ + if (INTEGRAL_TYPE_P (type) + && (TYPE_PRECISION (type) % BITS_PER_UNIT) != 0) + dest = NULL_TREE; + } else if (gimple_store_p (stmt) && gimple_assign_single_p (stmt) && TREE_CODE (gimple_assign_rhs1 (stmt)) == CONSTRUCTOR -- 2.43.0