On Sat, Apr 11, 2026 at 2:44 AM Pengxuan Zheng
<[email protected]> wrote:
>
> Currently, cselim requires the middle_bb to have only a single statement. This
> patch relaxes this restriction if the following pattern is found and also when
> it is safe to do the optimization.
>
> SPLIT_BB:
> ...
> STORE = X
> if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
> MIDDLE_BB:
> ...
> STORE = Y;
> ...
> fallthrough (edge E0)
> JOIN_BB:
> some more
>
> Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.
>
> PR tree-optimization/124405
>
> gcc/ChangeLog:
>
> * tree-ssa-phiopt.cc (trailing_store_in_bb): Allow vphi to be NULL to
> support more general scenarios.
> (cselim_candidate): New.
> (cond_store_replacement): Add split_bb argument and call
> cselim_candidate instead of last_and_only_stmt.
> (pass_cselim::execute): Update call to cond_store_replacement.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/pr124405.c: New test.
>
> Signed-off-by: Pengxuan Zheng <[email protected]>
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr124405.c | 12 ++
> gcc/tree-ssa-phiopt.cc | 151 +++++++++++++++++------
> 2 files changed, 124 insertions(+), 39 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr124405.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr124405.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr124405.c
> new file mode 100644
> index 00000000000..9ba230d2b56
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr124405.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -fdump-tree-cselim-details" } */
> +
> +void
> +f (int *a, int b)
> +{
> + *a &= ~3;
> + if (b)
> + *a |= 1;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Conditional store replacement
> happened" 1 "cselim"} } */
> diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
> index 0bf7e58b8f0..1db20ce53e8 100644
> --- a/gcc/tree-ssa-phiopt.cc
> +++ b/gcc/tree-ssa-phiopt.cc
> @@ -2980,6 +2980,113 @@ get_non_trapping (void)
> return nontrap;
> }
>
> +/* Return the last store in BB with VDEF or NULL if there are loads following
> + the store. VPHI if non-NULL is where the only use of the vdef should be.
> If
> + VPHI is NULL, return NULL if there is any load or store following the
> store.
> + If ONLYONESTORE is true, then the store is the only store in the BB. */
> +
> +static gimple *
> +trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi, bool
> onlyonestore)
> +{
> + if (SSA_NAME_IS_DEFAULT_DEF (vdef))
> + return NULL;
> + gimple *store = SSA_NAME_DEF_STMT (vdef);
> + if (gimple_bb (store) != bb
> + || gimple_code (store) == GIMPLE_PHI)
> + return NULL;
> +
> + /* Verify there is no other store in this BB if requested. */
> + if (onlyonestore
> + && !SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
> + && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb
> + && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI)
> + return NULL;
> +
> + if (vphi)
> + {
> + /* Verify the vdef of the store should only be used by VPHI. */
> + use_operand_p use_p;
> + gimple *use_stmt;
> + if (!single_imm_use (gimple_vdef (store), &use_p, &use_stmt))
> + return NULL;
> + if (use_stmt != vphi)
> + return NULL;
> + }
> + else
> + {
> + /* Verify there is no load or store after the store. */
> + use_operand_p use_p;
> + imm_use_iterator imm_iter;
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (store))
> + if (USE_STMT (use_p) != store && gimple_bb (USE_STMT (use_p)) == bb)
The VDEF of a store is never used in the store itself, so the BB check
is enough.
Note I don't see why we'd ever have no virtual PHI when there's a store in
middle-bb.
> + return NULL;
> + }
> +
> + return store;
> +}
> +
> +/* Return the candidate store for cselim. If there is only a single
> statement
> + in MIDDLE_BB, return that statement. Otherwise, try to find the following
> + pattern and return the only store in MIDDLE_BB.
> +
> + SPLIT_BB:
> + ...
> + STORE = X
> + if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
> + MIDDLE_BB:
> + ...
> + STORE = Y;
> + ...
> + fallthrough (edge E0)
> + JOIN_BB:
> + some more
> +*/
> +
> +static gimple *
> +cselim_candidate (basic_block split_bb, basic_block middle_bb,
> + basic_block join_bb, edge e0, edge e1)
> +{
> + gimple *middle_assign = last_and_only_stmt (middle_bb);
> + if (middle_assign)
> + return middle_assign;
> +
> + gphi *vphi = get_virtual_phi (join_bb);
> + if (!vphi)
> + return NULL;
> +
> + tree middle_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, e0);
> + middle_assign = trailing_store_in_bb (middle_bb, middle_vdef, vphi, true);
> +
> + if (!middle_assign || !gimple_assign_single_p (middle_assign)
> + || gimple_has_volatile_ops (middle_assign)
> + || stmt_references_abnormal_ssa_name (middle_assign))
> + return NULL;
> +
> + tree split_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, e1);
> + gimple *split_assign
> + = trailing_store_in_bb (split_bb, split_vdef, NULL, true);
> +
So what are you trying to match here? A comment is very much
warranted. Why can't you just look at the definition of the
VUSE of split_vdef? e1->src == split_bb, right?
> + if (!split_assign || !gimple_assign_single_p (split_assign)
> + || gimple_has_volatile_ops (split_assign)
> + || stmt_references_abnormal_ssa_name (split_assign))
> + return NULL;
> +
> + if (gimple_vdef (split_assign) != gimple_vuse (middle_assign)
> + || !operand_equal_p (gimple_assign_lhs (middle_assign),
> + gimple_assign_lhs (split_assign), 0))
> + return NULL;
> +
> + /* The vdef of split_assign should only be used by middle_assign or vphi.
> */
Why's that? Isn't the earlier store solely because we need to know
the conditional
store doesn't trap? For the actual cselim we'll insert a load, no?
So we'd need
to verify there's no (aliasing) store between split_bb and the trailing store
in middle-bb?
> + use_operand_p use_p;
> + imm_use_iterator imm_iter;
> + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vdef (split_assign))
> + if (USE_STMT (use_p) != middle_assign
> + && USE_STMT (use_p) != vphi)
> + return NULL;
> +
> + return middle_assign;
> +}
> +
> /* Do the main work of conditional store replacement. We already know
> that the recognized pattern looks like so:
>
> @@ -2997,10 +3104,11 @@ get_non_trapping (void)
> object) and that the store has a "simple" RHS. */
>
> static bool
> -cond_store_replacement (basic_block middle_bb, basic_block join_bb,
> - edge e0, edge e1, hash_set<tree> *nontrap)
> +cond_store_replacement (basic_block split_bb, basic_block middle_bb,
> + basic_block join_bb, edge e0, edge e1,
> + hash_set<tree> *nontrap)
> {
> - gimple *assign = last_and_only_stmt (middle_bb);
> + gimple *assign = cselim_candidate (split_bb, middle_bb, join_bb, e0, e1);
I think logically this now belongs as pre-check to ... (*)
> tree lhs, rhs, name, name2;
> gphi *newphi;
> gassign *new_stmt;
> @@ -3254,41 +3362,6 @@ cond_if_else_store_replacement_1 (basic_block then_bb,
> basic_block else_bb,
> return true;
> }
>
> -/* Return the last store in BB with VDEF or NULL if there are
> - loads following the store. VPHI is where the only use of the
> - vdef should be. If ONLYONESTORE is true, then the store is
> - the only store in the BB. */
> -
> -static gimple *
> -trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi, bool
> onlyonestore)
> -{
> - if (SSA_NAME_IS_DEFAULT_DEF (vdef))
> - return NULL;
> - gimple *store = SSA_NAME_DEF_STMT (vdef);
> - if (gimple_bb (store) != bb
> - || gimple_code (store) == GIMPLE_PHI)
> - return NULL;
> -
> - /* Verify there is no other store in this BB if requested. */
> - if (onlyonestore
> - && !SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
> - && gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb
> - && gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI)
> - return NULL;
> -
> -
> - /* Verify there is no load or store after the store, the vdef of the store
> - should only be used by the vphi joining the 2 bbs. */
> - use_operand_p use_p;
> - gimple *use_stmt;
> - if (!single_imm_use (gimple_vdef (store), &use_p, &use_stmt))
> - return NULL;
> - if (use_stmt != vphi)
> - return NULL;
> -
> - return store;
> -}
> -
> /* Limited Conditional store replacement. We already know
> that the recognized pattern looks like so:
>
> @@ -4259,7 +4332,7 @@ pass_cselim::execute (function *)
> optimization if the join block has more than two predecessors. */
> if (EDGE_COUNT (bb2->preds) > 2)
> return;
> - if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
> + if (cond_store_replacement (bb, bb1, bb2, e1, e2, nontrap))
(*) here.
> cfgchanged = true;
> };
>
> --
> 2.34.1
>