This improves copy prop for aggregates by working over statements that don't modify the access just like how it is done for copying zeros. To speed up things, we should only have one loop back on the vuse instead of doing it twice for each load/store assignments.
PR tree-optimization/14295 gcc/ChangeLog: * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Split rewriting part out into ... (optimize_memcpy_to_memset_1): This function. (optimize_agr_copyprop): Walk back until we get a statement that may clobber the read. Call optimize_memcpy_to_memset_1. Check that the src variable is not clobbered between the 2 statements. (pass_forwprop::execute): Call optimize_agr_copyprop only instead of optimize_memcpy_to_memset too. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/copy-prop-arg-1.c: New test. * gcc.dg/tree-ssa/copy-prop-arg-2.c: New test. * gcc.dg/tree-ssa/copy-prop-arg-3.c: New test. Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> --- .../gcc.dg/tree-ssa/copy-prop-arg-1.c | 37 +++++ .../gcc.dg/tree-ssa/copy-prop-arg-2.c | 35 ++++ .../gcc.dg/tree-ssa/copy-prop-arg-3.c | 41 +++++ gcc/tree-ssa-forwprop.cc | 149 ++++++++++++------ 4 files changed, 212 insertions(+), 50 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c new file mode 100644 index 00000000000..876556fc464 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +/* PR tree-optimization/14295 */ + +/* Check for copyprop on structs. */ + +struct s +{ + char d; + int a, b; + double m; + int t[1024]; +}; + +void g(); +struct s foo (struct s r) +{ + struct s temp_struct1; + struct s temp_struct2; + struct s temp_struct3; + temp_struct1 = r; + temp_struct2 = temp_struct1; + temp_struct3 = temp_struct2; + /* The call here should not make + a difference in removing all of the copies, + this is true as we end up with r = r; */ + g(); + r = temp_struct3; + return r; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c new file mode 100644 index 00000000000..b1a76b48953 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-2.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +/* PR tree-optimization/14295 */ + +/* Check for copyprop on structs. */ + +struct s +{ + char d; + int a, b; + double m; + int t[1024]; +}; + +void g(); +struct s foo (struct s r) +{ + struct s temp_struct1; + struct s temp_struct2; + struct s temp_struct3; + temp_struct1 = r; + temp_struct2 = temp_struct1; + temp_struct3 = temp_struct2; + /* The call here should not make + a difference in removing all of the copies. */ + g(); + return temp_struct3; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c new file mode 100644 index 00000000000..96efafbe20c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-arg-3.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-sra -fdump-tree-optimized -fdump-tree-forwprop1-details" } */ + +/* PR tree-optimization/14295 */ + +/* Check for copyprop on structs. */ + +struct s +{ + int t[1024]; +}; + +[[gnu::noinline]] +void g(float *a) +{ + *a = 1.0; +} + +struct s r; +struct s foo (float *a) +{ + struct s temp_struct1; + struct s temp_struct2; + struct s temp_struct3; + temp_struct1 = r; + temp_struct2 = temp_struct1; + temp_struct3 = temp_struct2; + /* The call here should not make + a difference in removing all of the copies, + this is true as we end up with r = r; g should be known only to change + float types so can be ignored with TBAA. */ + g(a); + r = temp_struct3; + return r; +} + +/* There should be no references to any of "temp_struct*" + temporaries. */ +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ +/* Also check that forwprop pass did the copy prop. */ +/* { dg-final { scan-tree-dump-times "after previous" 3 "forwprop1" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 242538ccc62..ec03a93da6d 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -1191,55 +1191,17 @@ constant_pointer_difference (tree p1, tree p2) return NULL_TREE; } - -/* Optimize - a = {}; - b = a; - into - a = {}; - b = {}; - Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; - and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ - static bool -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, tree len) +optimize_memcpy_to_memset_1 (gimple_stmt_iterator *gsip, tree dest, tree src, tree len, gimple *stmt, gimple *defstmt) { - ao_ref read; - gimple *stmt = gsi_stmt (*gsip); - if (gimple_has_volatile_ops (stmt)) - return false; - - tree src2 = NULL_TREE, len2 = NULL_TREE; poly_int64 offset, offset2; tree val = integer_zero_node; - bool len_was_null = len == NULL_TREE; - if (len == NULL_TREE) - len = (TREE_CODE (src) == COMPONENT_REF - ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) - : TYPE_SIZE_UNIT (TREE_TYPE (src))); + tree len2 = NULL_TREE; + tree src2 = NULL_TREE; if (len == NULL_TREE || !poly_int_tree_p (len)) return false; - ao_ref_init (&read, src); - tree vuse = gimple_vuse (stmt); - gimple *defstmt; - unsigned limit = param_sccvn_max_alias_queries_per_access; - do { - if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) - return false; - defstmt = SSA_NAME_DEF_STMT (vuse); - if (is_a <gphi*>(defstmt)) - return false; - if (limit-- == 0) - return false; - /* If the len was null, then we can use TBBA. */ - if (stmt_may_clobber_ref_p_1 (defstmt, &read, - /* tbaa_p = */ len_was_null)) - break; - vuse = gimple_vuse (defstmt); - } while (true); - if (gimple_store_p (defstmt) && gimple_assign_single_p (defstmt) && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST @@ -1341,6 +1303,50 @@ optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, tree } return true; } + +/* Optimize + a = {}; + b = a; + into + a = {}; + b = {}; + Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; + and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ + +static bool +optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree src, tree len) +{ + gcc_assert (len); + ao_ref read; + gimple *stmt = gsi_stmt (*gsip); + if (gimple_has_volatile_ops (stmt)) + return false; + + if (!poly_int_tree_p (len)) + return false; + + ao_ref_init (&read, src); + tree vuse = gimple_vuse (stmt); + gimple *defstmt; + unsigned limit = param_sccvn_max_alias_queries_per_access; + do { + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) + return false; + defstmt = SSA_NAME_DEF_STMT (vuse); + if (is_a <gphi*>(defstmt)) + return false; + if (limit-- == 0) + return false; + if (stmt_may_clobber_ref_p_1 (defstmt, &read, + /* tbaa_p = */ false)) + break; + vuse = gimple_vuse (defstmt); + } while (true); + + return optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt); +} + + /* Optimizes a = c; b = a; @@ -1374,7 +1380,33 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src) gsi_replace (gsip, gimple_build_nop (), false); return true; } - gimple *defstmt = SSA_NAME_DEF_STMT (vuse); + gimple *defstmt; + bool callinbetween = false; + + ao_ref read; + ao_ref_init (&read, src); + vuse = gimple_vuse (stmt); + unsigned limit = param_sccvn_max_alias_queries_per_access; + do { + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) + return false; + defstmt = SSA_NAME_DEF_STMT (vuse); + if (is_a <gphi*>(defstmt)) + return false; + if (limit-- == 0) + return false; + callinbetween |= is_a <gcall *>(defstmt); + if (stmt_may_clobber_ref_p_1 (defstmt, &read, true)) + break; + vuse = gimple_vuse (defstmt); + } while (true); + + tree len = (TREE_CODE (src) == COMPONENT_REF + ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) + : TYPE_SIZE_UNIT (TREE_TYPE (src))); + if (optimize_memcpy_to_memset_1 (gsip, dest, src, len, stmt, defstmt)) + return true; + if (!gimple_assign_load_p (defstmt) || !gimple_store_p (defstmt)) return false; @@ -1390,6 +1422,30 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src) if (!operand_equal_p (src, dest2, 0)) return false; + /* Check that the src2 has not changed. */ + ao_ref_init (&read, src2); + vuse = gimple_vuse (stmt); + limit = param_sccvn_max_alias_queries_per_access; + gimple *defstmt1; + do { + if (vuse == NULL || TREE_CODE (vuse) != SSA_NAME) + return false; + defstmt1 = SSA_NAME_DEF_STMT (vuse); + if (defstmt == defstmt1) + break; + if (is_a <gphi*>(defstmt1)) + return false; + if (limit-- == 0) + return false; + if (stmt_may_clobber_ref_p_1 (defstmt1, &read, true)) + { + if (defstmt != defstmt1) + return false; + break; + } + vuse = gimple_vuse (defstmt1); + } while (true); + /* If replacing with the same thing, then change the statement into a NOP. */ if (operand_equal_p (src2, dest, 0)) @@ -1407,6 +1463,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip, tree dest, tree src) gsi_replace (gsip, gimple_build_nop (), false); return true; } + /* For 2 memory refences and using a temporary to do the copy, don't remove the temporary as the 2 memory references might overlap. Note t does not need to be decl as it could be field. @@ -4806,14 +4863,6 @@ pass_forwprop::execute (function *fun) enum tree_code code = gimple_assign_rhs_code (stmt); if (gimple_assign_load_p (stmt) && gimple_store_p (stmt)) { - if (optimize_memcpy_to_memset (&gsi, - gimple_assign_lhs (stmt), - gimple_assign_rhs1 (stmt), - /* len = */NULL_TREE)) - { - changed = true; - break; - } if (optimize_agr_copyprop (&gsi, gimple_assign_lhs (stmt), gimple_assign_rhs1 (stmt))) { -- 2.43.0