On Wed, Dec 8, 2021 at 7:32 AM Xionghu Luo <[email protected]> wrote:
>
>
>
> On 2021/12/7 20:17, Richard Biener wrote:
> >>> + class loop *coldest_loop = coldest_outermost_loop[loop->num];
> >>> + if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
> >>> + {
> >>> + class loop *hotter_loop = hotter_than_inner_loop[loop->num];
> >>> + if (!hotter_loop
> >>> + || loop_depth (hotter_loop) < loop_depth (outermost_loop))
> >>> + return outermost_loop;
> >>> +
> >>> + /* hotter_loop is between OUTERMOST_LOOP and LOOP like:
> >>> + [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
> >>> + hotter_loop, second_coldest_loop, ..., loop]
> >>> + return second_coldest_loop to be the hoist target. */
> >>> + class loop *aloop;
> >>> + for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
> >>> + if (flow_loop_nested_p (aloop, loop))
> >> should be:
> >>
> >> if (aloop == loop || flow_loop_nested_p (aloop, loop))
> > OK with that fixed.
> >
> > Are necessary prerequesites committed to avoid regressions?
> > I guess we need to keep a watchful eye and eventually revert
> > (or gate with a --param disabled by default) the new behavior if
> > severe regressions are discovered.
> >
> > Thanks and sorry for the repeated delays.
> > Richard.
> >
>
> Thanks for your review, I learned quite a lot and gained very useful
> comments & help through the period :) There are still 3 patches required
> to avoid regression or so, I've reorganized them and sent it out.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586371.html
>
>
> In addition, cooked the patch to add option for disable/enable it.
> Is it OK to merge it to current patch?
Hmm, let's go without this flag for now, we can add something like this
when we see a testcase where that makes sense (and where profile info
is not wrecked by other bugs).
Adding a --param would be a no brainer, but for new user-facing options
we should be more careful.
Richard.
>
> [PATCH] Add option -fhoist-to-cold-loop
>
>
> gcc/ChangeLog:
>
> * common.opt: New.
> * loop-invariant.c (find_invariants_bb):
> * tree-ssa-loop-im.c (get_coldest_out_loop):
> (can_sm_ref_p):
> (loop_invariant_motion_in_fun):
> ---
> gcc/common.opt | 4 ++++
> gcc/loop-invariant.c | 2 +-
> gcc/tree-ssa-loop-im.c | 33 ++++++++++++++++++++++-----------
> 3 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index b921f5e3b25..62b82bd8b95 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1171,6 +1171,10 @@ fcode-hoisting
> Common Var(flag_code_hoisting) Optimization
> Enable code hoisting.
>
> +fhoist-to-cold-loop
> +Common Var(flag_hoist_to_cold_loop) Init(1) Optimization
> +Enable hoisting code to cold loop.
> +
> fcombine-stack-adjustments
> Common Var(flag_combine_stack_adjustments) Optimization
> Looks for opportunities to reduce stack adjustments and stack references.
> diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
> index 5c3be7bf0eb..75b9dd47cd7 100644
> --- a/gcc/loop-invariant.c
> +++ b/gcc/loop-invariant.c
> @@ -1189,7 +1189,7 @@ find_invariants_bb (class loop *loop, basic_block bb,
> bool always_reached,
> rtx_insn *insn;
> basic_block preheader = loop_preheader_edge (loop)->src;
>
> - if (preheader->count > bb->count)
> + if (flag_hoist_to_cold_loop && preheader->count > bb->count)
> return;
>
> FOR_BB_INSNS (bb, insn)
> diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
> index 565ee62d3f7..d745f66851b 100644
> --- a/gcc/tree-ssa-loop-im.c
> +++ b/gcc/tree-ssa-loop-im.c
> @@ -450,6 +450,9 @@ static class loop *
> get_coldest_out_loop (class loop *outermost_loop, class loop *loop,
> basic_block curr_bb)
> {
> + if (!flag_hoist_to_cold_loop)
> + return outermost_loop;
> +
> gcc_assert (outermost_loop == loop
> || flow_loop_nested_p (outermost_loop, loop));
>
> @@ -3031,8 +3034,9 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref)
> /* Verify whether the candidate is hot for LOOP. Only do store motion if
> the
> candidate's profile count is hot. Statement in cold BB shouldn't be
> moved
> out of it's loop_father. */
> - if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
> - return false;
> + if (flag_hoist_to_cold_loop)
> + if (!for_all_locs_in_loop (loop, ref, ref_in_loop_hot_body (loop)))
> + return false;
>
> return true;
> }
> @@ -3373,8 +3377,11 @@ tree_ssa_lim_finalize (void)
>
> free (bb_loop_postorder);
>
> - coldest_outermost_loop.release ();
> - hotter_than_inner_loop.release ();
> + if (flag_hoist_to_cold_loop)
> + {
> + coldest_outermost_loop.release ();
> + hotter_than_inner_loop.release ();
> + }
> }
>
> /* Moves invariants from loops. Only "expensive" invariants are moved out --
> @@ -3396,13 +3403,17 @@ loop_invariant_motion_in_fun (function *fun, bool
> store_motion)
>
> /* Pre-compute coldest outermost loop and nearest hotter loop of each loop.
> */
> - class loop *loop;
> - coldest_outermost_loop.create (number_of_loops (cfun));
> - coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
> - hotter_than_inner_loop.create (number_of_loops (cfun));
> - hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
> - for (loop = current_loops->tree_root->inner; loop != NULL; loop =
> loop->next)
> - fill_coldest_and_hotter_out_loop (loop, NULL, loop);
> + if (flag_hoist_to_cold_loop)
> + {
> + class loop *loop;
> + coldest_outermost_loop.create (number_of_loops (cfun));
> + coldest_outermost_loop.safe_grow_cleared (number_of_loops (cfun));
> + hotter_than_inner_loop.create (number_of_loops (cfun));
> + hotter_than_inner_loop.safe_grow_cleared (number_of_loops (cfun));
> + for (loop = current_loops->tree_root->inner; loop != NULL;
> + loop = loop->next)
> + fill_coldest_and_hotter_out_loop (loop, NULL, loop);
> + }
>
> int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
> int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
> --
> 2.27.0.90.geebb51ba8c
>
>