On Thu, Aug 7, 2025 at 1:22 PM Richard Biener <richard.guent...@gmail.com> wrote: > > On Wed, Aug 6, 2025 at 7:29 PM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > > > One thing I noticed while working on copy prop for aggregates is that we > > start with > > a memcpy like statement and then walk backwards. This means we could have a > > few walks > > backwards to see there was no statement for zeroing. Instead this changes > > the walk > > backwards into a true forwprop. In the future we can expand to forwprop the > > zeroing > > into say an function argument or something more than memcpy like statement. > > > > This should speed up slightly the compile time performance since there will > > be less > > memsets like statements than memcpy and there is only one walk forwards for > > memset like > > staments instead of multiple walk backwards to find the memset. > > > > Note this does add one extra improvement, the memcpy now does not need to > > have an address > > as its dest argument; this could have been done before too but it was even > > more noticable > > now because of the variable became only set so it was removed and the check > > was removed > > as well. > > > > There is also a fix on how ao_ref for the memset/memcpy is done, before it > > was just using > > ao_ref_init which is wrong since it should instead of used > > ao_ref_init_from_ptr_and_size. > > This part fixes PR 121422. > > OK.
Oh, as we now do alias walks in forwprop maybe we should make this conditional and do this not for all pass instances, since it makes forwprop possibly a lot slower? > Thanks, > Richard. > > > Changes since v1: > > * v2: Add back limit on the walk which was missed in v1. > > Move the call to get_addr_base_and_unit_offset outside > > of the vuse loop. > > * v3: Remove extra check before the call to optimize_aggr_zeroprop_1. > > Fix setting up of ao_ref for memset (PR121422). > > > > PR tree-optimization/118946 > > PR tree-optimization/121422 > > > > gcc/ChangeLog: > > > > * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Remove. > > (optimize_aggr_zeroprop_1): New function. > > (optimize_aggr_zeroprop): New function. > > (simplify_builtin_call): Don't call optimize_memcpy_to_memset > > for memcpy but call optimize_aggr_zeroprop for memset. > > (pass_forwprop::execute): Don't call optimize_memcpy_to_memset > > for aggregate copies but rather call optimize_aggr_zeroprop > > for aggregate stores. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/pr118946-1.c: New test. > > * gcc.dg/torture/pr121422-1.c: New test. > > * gcc.dg/torture/pr121422-2.c: New test. > > > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > > --- > > gcc/testsuite/gcc.dg/pr118946-1.c | 15 ++ > > gcc/testsuite/gcc.dg/torture/pr121422-1.c | 35 +++ > > gcc/testsuite/gcc.dg/torture/pr121422-2.c | 36 +++ > > gcc/tree-ssa-forwprop.cc | 300 +++++++++++++--------- > > 4 files changed, 258 insertions(+), 128 deletions(-) > > create mode 100644 gcc/testsuite/gcc.dg/pr118946-1.c > > create mode 100644 gcc/testsuite/gcc.dg/torture/pr121422-1.c > > create mode 100644 gcc/testsuite/gcc.dg/torture/pr121422-2.c > > > > diff --git a/gcc/testsuite/gcc.dg/pr118946-1.c > > b/gcc/testsuite/gcc.dg/pr118946-1.c > > new file mode 100644 > > index 00000000000..6cf2661f286 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/pr118946-1.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-forwprop1-details" > > } */ > > + > > +/* PR tree-optimization/118946 */ > > + > > +void f(char *a) > > +{ > > + char t[1024] = {}; > > + __builtin_memcpy(a, t, 10); > > +} > > + > > +/* We should be able to optimize the memcpy into a memset here. */ > > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1"} } */ > > +/* { dg-final { scan-tree-dump-times "memset " 1 "optimized" } } */ > > +/* { dg-final { scan-tree-dump-not "memcpy " "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.dg/torture/pr121422-1.c > > b/gcc/testsuite/gcc.dg/torture/pr121422-1.c > > new file mode 100644 > > index 00000000000..136f80d3ead > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/torture/pr121422-1.c > > @@ -0,0 +1,35 @@ > > +/* { dg-do run } */ > > +/* PR tree-optimization/121422 */ > > + > > +struct s1 > > +{ > > + char a[4]; > > +}; > > +struct s1 b; > > +char t[4]; > > + > > +/* if both t and b startout zero initialized before this function, > > + t should end up being: > > + {0, 0, 1, 0} > > + while b.a should end up being: > > + {0, 0, 0, 1} > > +*/ > > +__attribute__((noipa,noinline)) > > +void f(void) > > +{ > > + b = (struct s1){}; > > + b.a[3] = 1; > > + /* This memcpy should stay a memcpy and not become memset. */ > > + __builtin_memcpy(&t[0], &b.a[1], 3*sizeof(t[0])); > > +} > > + > > + > > +int main() > > +{ > > + f(); > > + for(int i = 0; i < 4; i++) > > + { > > + if (t[i] != (i == 2 ? 1 : 0)) > > + __builtin_abort(); > > + } > > +} > > diff --git a/gcc/testsuite/gcc.dg/torture/pr121422-2.c > > b/gcc/testsuite/gcc.dg/torture/pr121422-2.c > > new file mode 100644 > > index 00000000000..570559c6c73 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/torture/pr121422-2.c > > @@ -0,0 +1,36 @@ > > +/* { dg-do run } */ > > +/* PR tree-optimization/121422 */ > > + > > +struct s1 > > +{ > > + char a[4]; > > +}; > > +struct s1 b; > > +char t[4]; > > + > > +/* if both t and b startout zero initialized before this function, > > + t should end up being: > > + {0, 0, 1, 0} > > + while b.a should end up being: > > + {0, 0, 0, 1} > > +*/ > > +__attribute__((noipa,noinline)) > > +void f(void) > > +{ > > + __builtin_memset(&b.a[1], 0, 2); > > + b.a[3] = 1; > > + /* This memcpy should stay a memcpy and not become memset. */ > > + __builtin_memcpy(&t[0], &b.a[1], 3); > > +} > > + > > + > > +int main() > > +{ > > + f(); > > + for(int i = 0; i < 4; i++) > > + { > > + if (t[i] != (i == 2 ? 1 : 0)) > > + __builtin_abort(); > > + } > > +} > > + > > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > > index 2dc77ccba1d..156ea322867 100644 > > --- a/gcc/tree-ssa-forwprop.cc > > +++ b/gcc/tree-ssa-forwprop.cc > > @@ -1190,117 +1190,55 @@ constant_pointer_difference (tree p1, tree p2) > > return NULL_TREE; > > } > > > > - > > -/* Optimize > > - a = {}; > > - b = a; > > - into > > - a = {}; > > - b = {}; > > - Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; > > - and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ > > - > > +/* Helper function for optimize_aggr_zeroprop. > > + Props the zeroing (memset, VAL) that was done in DEST+OFFSET:LEN > > + (DEFSTMT) into the STMT. Returns true if the STMT was updated. */ > > static bool > > -optimize_memcpy_to_memset (gimple_stmt_iterator *gsip, tree dest, tree > > src, tree len) > > +optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt, > > + tree dest, poly_int64 offset, tree val, > > + poly_offset_int len) > > { > > - ao_ref read; > > - gimple *stmt = gsi_stmt (*gsip); > > - if (gimple_has_volatile_ops (stmt)) > > - return false; > > - > > - tree src2 = NULL_TREE, len2 = NULL_TREE; > > - poly_int64 offset, offset2; > > - tree val = integer_zero_node; > > - bool len_was_null = len == NULL_TREE; > > - if (len == NULL_TREE) > > - len = (TREE_CODE (src) == COMPONENT_REF > > - ? DECL_SIZE_UNIT (TREE_OPERAND (src, 1)) > > - : TYPE_SIZE_UNIT (TREE_TYPE (src))); > > - if (len == NULL_TREE > > - || !poly_int_tree_p (len)) > > - return false; > > + tree src2; > > + tree len2 = NULL_TREE; > > + poly_int64 offset2; > > > > - ao_ref_init (&read, src); > > - tree vuse = gimple_vuse (stmt); > > - gimple *defstmt; > > - unsigned limit = param_sccvn_max_alias_queries_per_access; > > - do { > > - /* If the vuse is the default definition, then there is no stores > > beforhand. */ > > - if (SSA_NAME_IS_DEFAULT_DEF (vuse)) > > - return false; > > - defstmt = SSA_NAME_DEF_STMT (vuse); > > - if (is_a <gphi*>(defstmt)) > > - return false; > > - if (limit-- == 0) > > - return false; > > - /* If the len was null, then we can use TBBA. */ > > - if (stmt_may_clobber_ref_p_1 (defstmt, &read, > > - /* tbaa_p = */ len_was_null)) > > - break; > > - vuse = gimple_vuse (defstmt); > > - } while (true); > > - > > - if (gimple_store_p (defstmt) > > - && gimple_assign_single_p (defstmt) > > - && TREE_CODE (gimple_assign_rhs1 (defstmt)) == STRING_CST > > - && !gimple_clobber_p (defstmt)) > > + if (gimple_call_builtin_p (stmt, BUILT_IN_MEMCPY) > > + && TREE_CODE (gimple_call_arg (stmt, 1)) == ADDR_EXPR > > + && poly_int_tree_p (gimple_call_arg (stmt, 2))) > > { > > - tree str = gimple_assign_rhs1 (defstmt); > > - src2 = gimple_assign_lhs (defstmt); > > - /* The string must contain all null char's for now. */ > > - for (int i = 0; i < TREE_STRING_LENGTH (str); i++) > > - { > > - if (TREE_STRING_POINTER (str)[i] != 0) > > - { > > - src2 = NULL_TREE; > > - break; > > - } > > - } > > - } > > - else if (gimple_store_p (defstmt) > > - && gimple_assign_single_p (defstmt) > > - && TREE_CODE (gimple_assign_rhs1 (defstmt)) == CONSTRUCTOR > > - && !gimple_clobber_p (defstmt)) > > - src2 = gimple_assign_lhs (defstmt); > > - else if (gimple_call_builtin_p (defstmt, BUILT_IN_MEMSET) > > - && TREE_CODE (gimple_call_arg (defstmt, 0)) == ADDR_EXPR > > - && TREE_CODE (gimple_call_arg (defstmt, 1)) == INTEGER_CST) > > - { > > - src2 = TREE_OPERAND (gimple_call_arg (defstmt, 0), 0); > > - len2 = gimple_call_arg (defstmt, 2); > > - val = gimple_call_arg (defstmt, 1); > > - /* For non-0 val, we'd have to transform stmt from assignment > > - into memset (only if dest is addressable). */ > > - if (!integer_zerop (val) && is_gimple_assign (stmt)) > > - src2 = NULL_TREE; > > + src2 = TREE_OPERAND (gimple_call_arg (stmt, 1), 0); > > + len2 = gimple_call_arg (stmt, 2); > > } > > + else if (gimple_assign_load_p (stmt) && gimple_store_p (stmt)) > > + { > > + src2 = gimple_assign_rhs1 (stmt); > > + len2 = (TREE_CODE (src2) == COMPONENT_REF > > + ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1)) > > + : TYPE_SIZE_UNIT (TREE_TYPE (src2))); > > + /* Can only handle zero memsets. */ > > + if (!integer_zerop (val)) > > + return false; > > + } > > + else > > + return false; > > > > - if (src2 == NULL_TREE) > > - return false; > > - > > - if (len2 == NULL_TREE) > > - len2 = (TREE_CODE (src2) == COMPONENT_REF > > - ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1)) > > - : TYPE_SIZE_UNIT (TREE_TYPE (src2))); > > if (len2 == NULL_TREE > > || !poly_int_tree_p (len2)) > > return false; > > > > - src = get_addr_base_and_unit_offset (src, &offset); > > src2 = get_addr_base_and_unit_offset (src2, &offset2); > > - if (src == NULL_TREE > > - || src2 == NULL_TREE > > - || maybe_lt (offset, offset2)) > > + if (src2 == NULL_TREE > > + || maybe_lt (offset2, offset)) > > return false; > > > > - if (!operand_equal_p (src, src2, 0)) > > + if (!operand_equal_p (dest, src2, 0)) > > return false; > > > > - /* [ src + offset2, src + offset2 + len2 - 1 ] is set to val. > > + /* [ dest + offset, dest + offset + len - 1 ] is set to val. > > Make sure that > > - [ src + offset, src + offset + len - 1 ] is a subset of that. */ > > - if (maybe_gt (wi::to_poly_offset (len) + (offset - offset2), > > - wi::to_poly_offset (len2))) > > + [ dest + offset2, dest + offset2 + len2 - 1 ] is a subset of that. */ > > + if (maybe_gt (wi::to_poly_offset (len2) + (offset2 - offset), > > + len)) > > return false; > > > > if (dump_file && (dump_flags & TDF_DETAILS)) > > @@ -1310,7 +1248,7 @@ optimize_memcpy_to_memset (gimple_stmt_iterator > > *gsip, tree dest, tree src, tree > > fprintf (dump_file, "after previous\n "); > > print_gimple_stmt (dump_file, defstmt, 0, dump_flags); > > } > > - > > + gimple *orig_stmt = stmt; > > /* For simplicity, don't change the kind of the stmt, > > turn dest = src; into dest = {}; and memcpy (&dest, &src, len); > > into memset (&dest, val, len); > > @@ -1320,8 +1258,10 @@ optimize_memcpy_to_memset (gimple_stmt_iterator > > *gsip, tree dest, tree src, tree > > of dest, dest isn't volatile. */ > > if (is_gimple_assign (stmt)) > > { > > - tree ctor = build_constructor (TREE_TYPE (dest), NULL); > > - gimple_assign_set_rhs_from_tree (gsip, ctor); > > + tree ctor_type = TREE_TYPE (gimple_assign_lhs (stmt)); > > + tree ctor = build_constructor (ctor_type, NULL); > > + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); > > + gimple_assign_set_rhs_from_tree (&gsi, ctor); > > update_stmt (stmt); > > statistics_counter_event (cfun, "copy zeroing propagation of > > aggregate", 1); > > } > > @@ -1341,8 +1281,126 @@ optimize_memcpy_to_memset (gimple_stmt_iterator > > *gsip, tree dest, tree src, tree > > fprintf (dump_file, "into\n "); > > print_gimple_stmt (dump_file, stmt, 0, dump_flags); > > } > > + > > + /* Mark the bb for eh cleanup if needed. */ > > + if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) > > + bitmap_set_bit (to_purge, gimple_bb (stmt)->index); > > + > > return true; > > } > > + > > +/* Optimize > > + a = {}; // DEST = value ;; LEN(nullptr) > > + b = a; > > + into > > + a = {}; > > + b = {}; > > + Similarly for memset (&a, ..., sizeof (a)); instead of a = {}; > > + and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ > > + > > +static bool > > +optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) > > +{ > > + ao_ref read; > > + gimple *stmt = gsi_stmt (*gsip); > > + if (gimple_has_volatile_ops (stmt)) > > + return false; > > + > > + tree dest = NULL_TREE; > > + tree val = integer_zero_node; > > + tree len = NULL_TREE; > > + bool can_use_tbba = true; > > + bool changed = false; > > + > > + if (gimple_call_builtin_p (stmt, BUILT_IN_MEMSET) > > + && TREE_CODE (gimple_call_arg (stmt, 0)) == ADDR_EXPR > > + && TREE_CODE (gimple_call_arg (stmt, 1)) == INTEGER_CST > > + && poly_int_tree_p (gimple_call_arg (stmt, 2))) > > + { > > + dest = TREE_OPERAND (gimple_call_arg (stmt, 0), 0); > > + len = gimple_call_arg (stmt, 2); > > + val = gimple_call_arg (stmt, 1); > > + ao_ref_init_from_ptr_and_size (&read, gimple_call_arg (stmt, 0), > > len); > > + can_use_tbba = false; > > + } > > + else if (gimple_store_p (stmt) > > + && gimple_assign_single_p (stmt) > > + && TREE_CODE (gimple_assign_rhs1 (stmt)) == STRING_CST) > > + { > > + tree str = gimple_assign_rhs1 (stmt); > > + dest = gimple_assign_lhs (stmt); > > + ao_ref_init (&read, dest); > > + /* The string must contain all null char's for now. */ > > + for (int i = 0; i < TREE_STRING_LENGTH (str); i++) > > + { > > + if (TREE_STRING_POINTER (str)[i] != 0) > > + { > > + dest = NULL_TREE; > > + break; > > + } > > + } > > + } > > + else if (gimple_store_p (stmt) > > + && gimple_assign_single_p (stmt) > > + && TREE_CODE (gimple_assign_rhs1 (stmt)) == CONSTRUCTOR > > + && !gimple_clobber_p (stmt)) > > + { > > + dest = gimple_assign_lhs (stmt); > > + ao_ref_init (&read, dest); > > + } > > + > > + if (dest == NULL_TREE) > > + return false; > > + > > + if (len == NULL_TREE) > > + len = (TREE_CODE (dest) == COMPONENT_REF > > + ? DECL_SIZE_UNIT (TREE_OPERAND (dest, 1)) > > + : TYPE_SIZE_UNIT (TREE_TYPE (dest))); > > + if (len == NULL_TREE > > + || !poly_int_tree_p (len)) > > + return false; > > + > > + /* This store needs to be on the byte boundary and pointing to an > > object. */ > > + poly_int64 offset; > > + tree dest_base = get_addr_base_and_unit_offset (dest, &offset); > > + if (dest_base == NULL_TREE) > > + return false; > > + > > + /* Setup the worklist. */ > > + auto_vec<std::pair<tree, unsigned>> worklist; > > + unsigned limit = param_sccvn_max_alias_queries_per_access; > > + worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit)); > > + > > + while (!worklist.is_empty ()) > > + { > > + std::pair<tree, unsigned> top = worklist.pop (); > > + tree vdef = top.first; > > + limit = top.second; > > + gimple *use_stmt; > > + imm_use_iterator iter; > > + FOR_EACH_IMM_USE_STMT (use_stmt, iter, vdef) > > + { > > + /* Handling PHI nodes might not be worth it so don't. */ > > + if (is_a <gphi*> (use_stmt)) > > + continue; > > + > > + /* If this statement does not clobber add the vdef stmt to the > > + worklist. */ > > + if (gimple_vdef (use_stmt) > > + && !stmt_may_clobber_ref_p_1 (use_stmt, &read, > > + /* tbaa_p = */ can_use_tbba) > > + && limit != 0) > > + worklist.safe_push (std::make_pair (gimple_vdef (use_stmt), > > + limit - 1)); > > + > > + if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset, > > + val, wi::to_poly_offset (len))) > > + changed = true; > > + } > > + } > > + > > + return changed; > > +} > > /* Optimizes > > DEST = SRC; > > DEST2 = DEST; # DEST2 = SRC2; > > @@ -1462,22 +1520,6 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > > tree callee2) > > > > switch (DECL_FUNCTION_CODE (callee2)) > > { > > - case BUILT_IN_MEMCPY: > > - if (gimple_call_num_args (stmt2) == 3) > > - { > > - tree dest = gimple_call_arg (stmt2, 0); > > - tree src = gimple_call_arg (stmt2, 1); > > - tree len = gimple_call_arg (stmt2, 2); > > - /* Try to optimize the memcpy to memset if src > > - and dest are addresses. */ > > - if (TREE_CODE (dest) == ADDR_EXPR > > - && TREE_CODE (src) == ADDR_EXPR > > - && TREE_CODE (len) == INTEGER_CST > > - && optimize_memcpy_to_memset (gsi_p, TREE_OPERAND (dest, 0), > > - TREE_OPERAND (src, 0), len)) > > - return true; > > - } > > - break; > > case BUILT_IN_MEMCHR: > > if (gimple_call_num_args (stmt2) == 3 > > && (res = gimple_call_lhs (stmt2)) != nullptr > > @@ -1539,6 +1581,13 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, > > tree callee2) > > break; > > > > case BUILT_IN_MEMSET: > > + if (gimple_call_num_args (stmt2) == 3) > > + { > > + /* Try to prop the zeroing/value of the memset to memcpy > > + if the dest is an address and the value is a constant. */ > > + if (optimize_aggr_zeroprop (gsi_p)) > > + return true; > > + } > > if (gimple_call_num_args (stmt2) != 3 > > || gimple_call_lhs (stmt2) > > || CHAR_BIT != 8 > > @@ -4857,21 +4906,16 @@ pass_forwprop::execute (function *fun) > > { > > tree rhs1 = gimple_assign_rhs1 (stmt); > > enum tree_code code = gimple_assign_rhs_code (stmt); > > - if (gimple_assign_load_p (stmt) && gimple_store_p > > (stmt)) > > + if (gimple_store_p (stmt) && optimize_aggr_zeroprop > > (&gsi)) > > { > > - if (optimize_memcpy_to_memset (&gsi, > > - gimple_assign_lhs > > (stmt), > > - gimple_assign_rhs1 > > (stmt), > > - /* len = > > */NULL_TREE)) > > - { > > - changed = true; > > - break; > > - } > > - if (optimize_agr_copyprop (&gsi)) > > - { > > - changed = true; > > - break; > > - } > > + changed = true; > > + break; > > + } > > + if (gimple_assign_load_p (stmt) && gimple_store_p (stmt) > > + && optimize_agr_copyprop (&gsi)) > > + { > > + changed = true; > > + break; > > } > > > > if (TREE_CODE_CLASS (code) == tcc_comparison) > > -- > > 2.43.0 > >