On Wed, Aug 6, 2025 at 7:34 PM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > This implements the simple copy prop of aggregates into > arguments of function calls. This can reduce the number of copies > done. Just like removing of an extra copy in general, this can and > will help out SRA; since we might not need to do a full scalarization > of the aggregate now. > > This is the simpliest form of this copy prop of aggregates.
Hmm, I wonder about x = foo (x); so cannot both cases apply? In particular ... > gcc/ChangeLog: > > * tree-ssa-forwprop.cc (optimize_agr_copyprop_1): New function split > out of ... > (optimize_agr_copyprop): Here. Also try calling > optimize_agr_copyprop_arg. > (optimize_agr_copyprop_arg): New function. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/copy-prop-aggregate-arg-1.c: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > .../tree-ssa/copy-prop-aggregate-arg-1.c | 21 +++ > gcc/tree-ssa-forwprop.cc | 163 ++++++++++++------ > 2 files changed, 134 insertions(+), 50 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-arg-1.c > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-arg-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-arg-1.c > new file mode 100644 > index 00000000000..2ce68918e57 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-arg-1.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-forwprop1-details" } > */ > + > +struct s1 > +{ > + int t[1024]; > +}; > + > +void f(struct s1); > + > +void g(struct s1 a) > +{ > + struct s1 temp_struct0 = a; > + f(temp_struct0); > +} > + > +/* There should be no references to any of "temp_struct*" > + temporaries. */ > +/* { dg-final { scan-tree-dump-times "temp_struct" 0 "optimized" } } */ > +/* Also check that forwprop pass did the copy prop. */ > +/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */ > diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc > index 1cde5f85150..d6f867bae7c 100644 > --- a/gcc/tree-ssa-forwprop.cc > +++ b/gcc/tree-ssa-forwprop.cc > @@ -1416,6 +1416,107 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) > > return changed; > } > + > +/* Helper function for optimize_agr_copyprop. > + For aggregate copies in USE_STMT, see if DEST > + is on the lhs of USE_STMT and replace it with SRC. */ > +static bool > +optimize_agr_copyprop_1 (gimple *stmt, gimple *use_stmt, > + tree dest, tree src) > +{ > + if (!gimple_assign_load_p (use_stmt) > + || !gimple_store_p (use_stmt)) > + return false; > + if (gimple_has_volatile_ops (use_stmt)) > + return false; > + tree dest2 = gimple_assign_lhs (use_stmt); > + tree src2 = gimple_assign_rhs1 (use_stmt); > + /* If the new store is `src2 = src2;` skip over it. */ > + if (operand_equal_p (src2, dest2, 0)) > + return false; > + if (!operand_equal_p (dest, src2, 0)) > + return false; > + /* For 2 memory refences and using a temporary to do the copy, > + don't remove the temporary as the 2 memory references might overlap. > + Note t does not need to be decl as it could be field. > + See PR 22237 for full details. > + E.g. > + t = *a; #DEST = SRC; > + *b = t; #DEST2 = SRC2; > + Cannot be convert into > + t = *a; > + *b = *a; > + Though the following is allowed to be done: > + t = *a; > + *a = t; > + And convert it into: > + t = *a; > + *a = *a; > + */ > + if (!operand_equal_p (dest2, src, 0) > + && !DECL_P (dest2) && !DECL_P (src)) > + return false; > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Simplified\n "); > + print_gimple_stmt (dump_file, use_stmt, 0, dump_flags); > + fprintf (dump_file, "after previous\n "); > + print_gimple_stmt (dump_file, stmt, 0, dump_flags); > + } > + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); > + gimple_assign_set_rhs_from_tree (&gsi, unshare_expr (src)); > + update_stmt (use_stmt); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "into\n "); > + print_gimple_stmt (dump_file, use_stmt, 0, dump_flags); > + } > + statistics_counter_event (cfun, "copy prop for aggregate", 1); > + return true; > +} > + > +/* Helper function for optimize_agr_copyprop_1, propagate aggregates > + into the arguments of USE_STMT if the argument matches with DEST; > + replacing it with SRC. */ > +static bool > +optimize_agr_copyprop_arg (gimple *defstmt, gimple *use_stmt, > + tree dest, tree src) > +{ > + gcall *call = dyn_cast<gcall*>(use_stmt); > + if (!call) > + return false; > + bool changed = false; > + for (unsigned arg = 0; arg < gimple_call_num_args (call); arg++) > + { > + tree *argptr = gimple_call_arg_ptr (call, arg); > + if (TREE_CODE (*argptr) == SSA_NAME > + || is_gimple_min_invariant (*argptr) > + || TYPE_VOLATILE (TREE_TYPE (*argptr))) > + continue; > + if (!operand_equal_p (*argptr, dest, 0)) > + continue; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "Simplified\n "); > + print_gimple_stmt (dump_file, call, 0, dump_flags); > + fprintf (dump_file, "after previous\n "); > + print_gimple_stmt (dump_file, defstmt, 0, dump_flags); > + } > + *argptr = unshare_expr (src); > + changed = true; > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + fprintf (dump_file, "into\n "); > + print_gimple_stmt (dump_file, call, 0, dump_flags); > + } > + } > + if (changed) > + update_stmt (call); > + return changed; > +} > + > /* Optimizes > DEST = SRC; > DEST2 = DEST; # DEST2 = SRC2; > @@ -1424,6 +1525,14 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) > DEST2 = SRC; > GSIP is the first statement and SRC is the common > between the statements. > + > + Also optimizes: > + DEST = SRC; > + call_func(..., DEST, ...); > + into: > + DEST = SRC; > + call_func(..., SRC, ...); > + > */ > static bool > optimize_agr_copyprop (gimple_stmt_iterator *gsip) > @@ -1448,56 +1557,10 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) > bool changed = false; > FOR_EACH_IMM_USE_STMT (use_stmt, iter, vdef) > { > - if (!gimple_assign_load_p (use_stmt) > - || !gimple_store_p (use_stmt)) > - continue; > - if (gimple_has_volatile_ops (use_stmt)) > - continue; > - tree dest2 = gimple_assign_lhs (use_stmt); > - tree src2 = gimple_assign_rhs1 (use_stmt); > - /* If the new store is `src2 = src2;` skip over it. */ > - if (operand_equal_p (src2, dest2, 0)) > - continue; > - if (!operand_equal_p (dest, src2, 0)) > - continue; > - /* For 2 memory refences and using a temporary to do the copy, > - don't remove the temporary as the 2 memory references might overlap. > - Note t does not need to be decl as it could be field. > - See PR 22237 for full details. > - E.g. > - t = *a; #DEST = SRC; > - *b = t; #DEST2 = SRC2; > - Cannot be convert into > - t = *a; > - *b = *a; > - Though the following is allowed to be done: > - t = *a; > - *a = t; > - And convert it into: > - t = *a; > - *a = *a; > - */ > - if (!operand_equal_p (dest2, src, 0) > - && !DECL_P (dest2) && !DECL_P (src)) > - continue; > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, "Simplified\n "); > - print_gimple_stmt (dump_file, use_stmt, 0, dump_flags); > - fprintf (dump_file, "after previous\n "); > - print_gimple_stmt (dump_file, stmt, 0, dump_flags); > - } > - gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); > - gimple_assign_set_rhs_from_tree (&gsi, unshare_expr (src)); > - update_stmt (use_stmt); > - > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - fprintf (dump_file, "into\n "); > - print_gimple_stmt (dump_file, use_stmt, 0, dump_flags); > - } > - statistics_counter_event (cfun, "copy prop for aggregate", 1); > - changed = true; > + if (optimize_agr_copyprop_1 (stmt, use_stmt, dest, src)) ... it's not obvious this only applies to gassign use_stmt and > + changed = true; > + else if (optimize_agr_copyprop_arg (stmt, use_stmt, dest, src)) ... this to gcall use_stmt. Having those checks here would be better IMO. Richard. > + changed = true; > } > return changed; > } > -- > 2.43.0 >