On Fri, May 23, 2025 at 10:12 PM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > This improves copy prop for aggregates by working over statements that don't > modify the access > just like how it is done for copying zeros. > To speed up things, we should only have one loop back on the vuse instead of > doing it twice for > each load/store assignments.
I will be submitting this after the first patch is approved. Note there is a bug with the check of stmt_may_clobber_ref_p_1 when it comes to checking stores, we can't use TBAA. Otherwise you miscompile the following: ``` struct f{unsigned t;}; struct p{unsigned t;}; unsigned int tt; static inline void g(struct f *a, struct p *b) { struct f c = *a; b->t = 1; *a = c; } struct f a = {}; int h(void) { g(&a, (struct p*)&a); return a.t; } ``` Thanks, Andrew > > PR tree-optimization/14295 > > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Split rewriting > part out into ... > (optimize_memcpy_to_memset_1): This function. > (optimize_agr_copyprop): Walk back until we get a > statement that may clobber the read. Call optimize_memcpy_to_memset_1. > Check that the src variable is not clobbered between the 2 statements. > (pass_forwprop::execute): Call optimize_agr_copyprop only instead of > optimize_memcpy_to_memset too. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test. > * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test. > * gcc.dg/tree-ssa/copy-prop-arg-3.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > .../gcc.dg/tree-ssa/copy-prop-arg-1.c | 37 +++++ > .../gcc.dg/tree-ssa/copy-prop-arg-2.c | 35 ++++ > .../gcc.dg/tree-ssa/copy-prop-arg-3.c | 41 +++++ > gcc/tree-ssa-forwprop.cc | 149 ++++++++++++------ > 4 files changed, 212 insertions(+), 50 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > new file mode 100644 > index 00000000000..876556fc464 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c > @@ -0,0 +1,37 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > + > +/* Check for copyprop on structs. */ > + > +struct s > +{ > + char d; > + int a, b; > + double m; > + int t[1024]; > +}; > + > +void g(); > +struct s foo (struct s r) > +{ > + struct s temp_struct1; > + struct s temp_struct2; > + struct s temp_struct3; > + temp_struct1 = r; > + temp_struct2 = temp_struct1; > + temp_struct3 = temp_struct2; > + /* The call here should not make > + a difference in removing all of the copies, > + this is true as we end up with r = r; */ > + g(); > + r = temp_struct3; > + return r; > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > new file mode 100644 > index 00000000000..b1a76b48953 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > + > +/* Check for copyprop on structs. */ > + > +struct s > +{ > + char d; > + int a, b; > + double m; > + int t[1024]; > +}; > + > +void g(); > +struct s foo (struct s r) > +{ > + struct s temp_struct1; > + struct s temp_struct2; > + struct s temp_struct3; > + temp_struct1 = r; > + temp_struct2 = temp_struct1; > + temp_struct3 = temp_struct2; > + /* The call here should not make > + a difference in removing all of the copies. */ > + g(); > + return temp_struct3; > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c > new file mode 100644 > index 00000000000..96efafbe20c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c > @@ -0,0 +1,41 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fno-tree-sra -fdump-tree-optimized > -fdump-tree-forwprop1-details" } */ > + > +/* PR tree-optimization/14295 */ > + > +/* Check for copyprop on structs. */ > + > +struct s > +{ > + int t[1024]; > +}; > + > +[[gnu::noinline]] > +void g(float *a) > +{ > + *a = 1.0; > +} > + > +struct s r; > +struct s foo (float *a) > +{ > + struct s temp_struct1; > + struct s temp_struct2; > + struct s temp_struct3; > + temp_struct1 = r; > + temp_struct2 = temp_struct1; > + temp_struct3 = temp_struct2; > + /* The call here should not make > + a difference in removing all of the copies, > + this is true as we end up with r = r; g should be known only to change > + float types so can be ignored with TBAA. */ > + g(a); > + r = temp_struct3; > + return r; > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 242538ccc62..ec03a93da6d 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1191,55 +1191,17 @@ constant_pointer_difference (tree p1, tree p2) > return NULL_TREE; > } > > - > -/* Optimize > - a = {}; > - b = a; > - into > - a = {}; > - b = {}; > - Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; > - and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ > - > static bool > -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, > tree len) > +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree > src, tree len, gimple *stmt, gimple *defstmt) > { > - ao_ref read; > - gimple *stmt = gsi_stmt (*gsip); > - if (gimple_has_volatile_ops (stmt)) > - return false; > - > - tree src2 = NULL_TREE, len2 = NULL_TREE; > poly_int64 offset, offset2; > tree val = integer_zero_node; > - bool len_was_null = len == NULL_TREE; > - if (len == NULL_TREE) > - len = (TREE_CODE (src) == COMPONENT_REF > - ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) > - : TYPE_SIZE_UNIT (TREE_TYPE (src))); > + tree len2 = NULL_TREE; > + tree src2 = NULL_TREE; > if (len == NULL_TREE > || !poly_int_tree_p (len)) > return false; > > - ao_ref_init (&read, src); > - tree vuse = gimple_vuse (stmt); > - gimple *defstmt; > - unsigned limit = param_sccvn_max_alias_queries_per_access; > - do { > - if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) > - return false; > - defstmt = SSA_NAME_DEF_STMT (vuse); > - if (is_a <gphi*>(defstmt)) > - return false; > - if (limit-- == 0) > - return false; > - /* If the len was null, then we can use TBBA. */ > - if (stmt_may_clobber_ref_p_1 (defstmt, &read, > - /* tbaa_p = */ len_was_null)) > - break; > - vuse = gimple_vuse (defstmt); > - } while (true); > - > if (gimple_store_p (defstmt) > && gimple_assign_single_p (defstmt) > && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST > @@ -1341,6 +1303,50 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, > tree dest, tree src, tree > } > return true; > } > + > +/* Optimize > + a = {}; > + b = a; > + into > + a = {}; > + b = {}; > + Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; > + and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ > + > +static bool > +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, > tree len) > +{ > + gcc_assert (len); > + ao_ref read; > + gimple *stmt = gsi_stmt (*gsip); > + if (gimple_has_volatile_ops (stmt)) > + return false; > + > + if (!poly_int_tree_p (len)) > + return false; > + > + ao_ref_init (&read, src); > + tree vuse = gimple_vuse (stmt); > + gimple *defstmt; > + unsigned limit = param_sccvn_max_alias_queries_per_access; > + do { > + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) > + return false; > + defstmt = SSA_NAME_DEF_STMT (vuse); > + if (is_a <gphi*>(defstmt)) > + return false; > + if (limit-- == 0) > + return false; > + if (stmt_may_clobber_ref_p_1 (defstmt, &read, > + /* tbaa_p = */ false)) > + break; > + vuse = gimple_vuse (defstmt); > + } while (true); > + > + return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt); > +} > + > + > /* Optimizes > a = c; > b = a; > @@ -1374,7 +1380,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, > tree dest, tree src) > gsi_replace (gsip, gimple_build_nop (), false); > return true; > } > - gimple *defstmt = SSA_NAME_DEF_STMT (vuse); > + gimple *defstmt; > + bool callinbetween = false; > + > + ao_ref read; > + ao_ref_init (&read, src); > + vuse = gimple_vuse (stmt); > + unsigned limit = param_sccvn_max_alias_queries_per_access; > + do { > + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) > + return false; > + defstmt = SSA_NAME_DEF_STMT (vuse); > + if (is_a <gphi*>(defstmt)) > + return false; > + if (limit-- == 0) > + return false; > + callinbetween |= is_a <gcall *>(defstmt); > + if (stmt_may_clobber_ref_p_1 (defstmt, &read, true)) > + break; > + vuse = gimple_vuse (defstmt); > + } while (true); > + > + tree len = (TREE_CODE (src) == COMPONENT_REF > + ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) > + : TYPE_SIZE_UNIT (TREE_TYPE (src))); > + if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt)) > + return true; > + > if (!gimple_assign_load_p (defstmt) > || !gimple_store_p (defstmt)) > return false; > @@ -1390,6 +1422,30 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, > tree dest, tree src) > if (!operand_equal_p (src, dest2, 0)) > return false; > > + /* Check that the src2 has not changed. */ > + ao_ref_init (&read, src2); > + vuse = gimple_vuse (stmt); > + limit = param_sccvn_max_alias_queries_per_access; > + gimple *defstmt1; > + do { > + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) > + return false; > + defstmt1 = SSA_NAME_DEF_STMT (vuse); > + if (defstmt == defstmt1) > + break; > + if (is_a <gphi*>(defstmt1)) > + return false; > + if (limit-- == 0) > + return false; > + if (stmt_may_clobber_ref_p_1 (defstmt1, &read, true)) > + { > + if (defstmt != defstmt1) > + return false; > + break; > + } > + vuse = gimple_vuse (defstmt1); > + } while (true); > + > /* If replacing with the same thing, then change the statement > into a NOP. */ > if (operand_equal_p (src2, dest, 0)) > @@ -1407,6 +1463,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree > dest, tree src) > gsi_replace (gsip, gimple_build_nop (), false); > return true; > } > + > /* For 2 memory refences and using a temporary to do the copy, > don't remove the temporary as the 2 memory references might overlap. > Note t does not need to be decl as it could be field. > @@ -4806,14 +4863,6 @@ pass_forwprop::execute (function *fun) > enum tree_code code = gimple_assign_rhs_code (stmt); > if (gimple_assign_load_p (stmt) && gimple_store_p (stmt)) > { > - if (optimize_memcpy_to_memset (&gsi, > - gimple_assign_lhs > (stmt), > - gimple_assign_rhs1 > (stmt), > - /* len = */NULL_TREE)) > - { > - changed = true; > - break; > - } > if (optimize_agr_copyprop (&gsi, gimple_assign_lhs > (stmt), > gimple_assign_rhs1 (stmt))) > { > -- > 2.43.0 >