> Am 13.08.2025 um 19:04 schrieb Andrew Pinski <andrew.pin...@oss.qualcomm.com>:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2025-August/692091.html
> pointed out:
> '''
> Oh, as we now do alias walks in forwprop maybe we should make this
> conditional and do
> this not for all pass instances, since it makes forwprop possibly a lot
> slower?
> '''
>
> This does patch limits the walk in a few different ways.
> First only allow for a full walk in the first 2 forwprop (the one before
> inlining
> and the one after inlining). The other 2 forwprop are less likely to find
> any extra
> zero prop so limit them so there is no walk.
>
> There is an exception to the rule though, allowing to skip over clobbers
> still since those
> will not take a long time for the walk and from when looking at benchmarks
> the only place
> where forwprop3/4 would cause a zero prop.
>
> The other thing is limit a full walk only if flag_expensive_optimizations is
> true.
> This limits the walk for -O1 since flag_expensive_optimizations is turned on
> at -O2+.
>
> Bootstrapped and tested on x86_64-linux-gnu.
Ok
Thanks,
Richard
> gcc/ChangeLog:
>
> * passes.def: Update forwprop1/2 to have full_walk to be true.
> * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Add new argument
> full_walk. Take into account the full_walk and clobbers at the end
> of the limit can be done always.
> (simplify_builtin_call): Add new argument, full_walk.
> Update call to optimize_aggr_zeroprop.
> (pass_forwprop): Add m_full_walk field.
> (pass_forwprop::set_pass_param): Update for m_full_walk.
> (pass_forwprop::execute): Update call to simplify_builtin_call
> and optimize_aggr_zeroprop.
>
> Signed-off-by: Andrew Pinski <andrew.pin...@oss.qualcomm.com>
> ---
> gcc/passes.def | 8 ++++----
> gcc/tree-ssa-forwprop.cc | 42 ++++++++++++++++++++++++++++------------
> 2 files changed, 34 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/passes.def b/gcc/passes.def
> index d528a0477d9..68ce53baa0f 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -83,7 +83,7 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_ccp, false /* nonzero_p */);
> /* After CCP we rewrite no longer addressed locals into SSA
> form if possible. */
> - NEXT_PASS (pass_forwprop, /*last=*/false);
> + NEXT_PASS (pass_forwprop, /*full_walk=*/true);
> NEXT_PASS (pass_early_thread_jumps, /*first=*/true);
> NEXT_PASS (pass_sra_early);
> /* pass_build_ealias is a dummy pass that ensures that we
> @@ -221,7 +221,7 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_complete_unrolli);
> NEXT_PASS (pass_backprop);
> NEXT_PASS (pass_phiprop);
> - NEXT_PASS (pass_forwprop, /*last=*/false);
> + NEXT_PASS (pass_forwprop, /*full_walk=*/true);
> /* pass_build_alias is a dummy pass that ensures that we
> execute TODO_rebuild_alias at this point. */
> NEXT_PASS (pass_build_alias);
> @@ -261,7 +261,7 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_isolate_erroneous_paths);
> NEXT_PASS (pass_reassoc, true /* early_p */);
> NEXT_PASS (pass_dce);
> - NEXT_PASS (pass_forwprop, /*last=*/false);
> + NEXT_PASS (pass_forwprop);
> NEXT_PASS (pass_phiopt, false /* early_p */);
> NEXT_PASS (pass_ccp, true /* nonzero_p */);
> /* After CCP we rewrite no longer addressed locals into SSA
> @@ -363,7 +363,7 @@ along with GCC; see the file COPYING3. If not see
> NEXT_PASS (pass_dce, true /* update_address_taken_p */, true /*
> remove_unused_locals */);
> /* After late DCE we rewrite no longer addressed locals into SSA
> form if possible. */
> - NEXT_PASS (pass_forwprop, /*last=*/true);
> + NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true);
> NEXT_PASS (pass_sink_code, true /* unsplit edges */);
> NEXT_PASS (pass_phiopt, false /* early_p */);
> NEXT_PASS (pass_fold_builtins);
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index ec4fbeb9e54..ebf625f9c7e 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -1299,7 +1299,7 @@ optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt,
> and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */
>
> static bool
> -optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
> +optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, bool full_walk)
> {
> ao_ref read;
> gimple *stmt = gsi_stmt (*gsip);
> @@ -1383,7 +1383,7 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
>
> /* Setup the worklist. */
> auto_vec<std::pair<tree, unsigned>> worklist;
> - unsigned limit = param_sccvn_max_alias_queries_per_access;
> + unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 0;
> worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit));
>
> while (!worklist.is_empty ())
> @@ -1400,13 +1400,17 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip)
> continue;
>
> /* If this statement does not clobber add the vdef stmt to the
> - worklist. */
> - if (limit != 0
> + worklist.
> + After hitting the limit, allow clobbers to able to pass through. */
> + if ((limit != 0 || gimple_clobber_p (use_stmt))
> && gimple_vdef (use_stmt)
> && !stmt_may_clobber_ref_p_1 (use_stmt, &read,
> /* tbaa_p = */ can_use_tbba))
> - worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
> - limit - 1));
> + {
> + unsigned new_limit = limit == 0 ? 0 : limit - 1;
> + worklist.safe_push (std::make_pair (gimple_vdef (use_stmt),
> + new_limit));
> + }
>
> if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset,
> val, wi::to_poly_offset (len)))
> @@ -1591,7 +1595,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
> to __atomic_fetch_op (p, x, y) when possible (also __sync). */
>
> static bool
> -simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
> +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2, bool
> full_walk)
> {
> gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p);
> enum built_in_function other_atomic = END_BUILTINS;
> @@ -1670,7 +1674,7 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p,
> tree callee2)
> {
> /* Try to prop the zeroing/value of the memset to memcpy
> if the dest is an address and the value is a constant. */
> - if (optimize_aggr_zeroprop (gsi_p))
> + if (optimize_aggr_zeroprop (gsi_p, full_walk))
> return true;
> }
> if (gimple_call_num_args (stmt2) != 3
> @@ -4460,8 +4464,17 @@ public:
> opt_pass * clone () final override { return new pass_forwprop (m_ctxt); }
> void set_pass_param (unsigned int n, bool param) final override
> {
> - gcc_assert (n == 0);
> - last_p = param;
> + switch (n)
> + {
> + case 0:
> + m_full_walk = param;
> + break;
> + case 1:
> + last_p = param;
> + break;
> + default:
> + gcc_unreachable();
> + }
> }
> bool gate (function *) final override { return flag_tree_forwprop; }
> unsigned int execute (function *) final override;
> @@ -4469,12 +4482,17 @@ public:
> private:
> /* Determines whether the pass instance should set PROP_last_full_fold. */
> bool last_p;
> +
> + /* True if the aggregate props are doing a full walk or not. */
> + bool m_full_walk = false;
> }; // class pass_forwprop
>
> unsigned int
> pass_forwprop::execute (function *fun)
> {
> unsigned int todoflags = 0;
> + /* Handle a full walk only when expensive optimizations are on. */
> + bool full_walk = m_full_walk && flag_expensive_optimizations;
>
> cfg_changed = false;
> if (last_p)
> @@ -4991,7 +5009,7 @@ pass_forwprop::execute (function *fun)
> {
> tree rhs1 = gimple_assign_rhs1 (stmt);
> enum tree_code code = gimple_assign_rhs_code (stmt);
> - if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi))
> + if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi,
> full_walk))
> {
> changed = true;
> break;
> @@ -5051,7 +5069,7 @@ pass_forwprop::execute (function *fun)
> tree callee = gimple_call_fndecl (stmt);
> if (callee != NULL_TREE
> && fndecl_built_in_p (callee, BUILT_IN_NORMAL))
> - changed |= simplify_builtin_call (&gsi, callee);
> + changed |= simplify_builtin_call (&gsi, callee, full_walk);
> break;
> }
>
> --
> 2.43.0
>