On Wed, May 3, 2023 at 8:28 AM Andrew Pinski <pins...@gmail.com> wrote: > > On Tue, May 2, 2023 at 11:14 PM Richard Biener > <richard.guent...@gmail.com> wrote: > > > > On Wed, May 3, 2023 at 12:04 AM Andrew Pinski <pins...@gmail.com> wrote: > > > > > > On Tue, May 2, 2023 at 5:26 AM Richard Biener via Gcc-patches > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > On Sun, Apr 30, 2023 at 11:14 PM Andrew Pinski via Gcc-patches > > > > <gcc-patches@gcc.gnu.org> wrote: > > > > > > > > > > While looking at differences between what minmax_replacement > > > > > and match_simplify_replacement does. I noticed that they sometimes > > > > > chose different edges to remove. I decided we should be able to do > > > > > better and be able to remove both empty basic blocks in the > > > > > case of match_simplify_replacement as that moves the statements. > > > > > > > > > > This also updates the testcases as now match_simplify_replacement > > > > > will remove the unused MIN/MAX_EXPR and they were checking for > > > > > those. > > > > > > > > > > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. > > > > > > > > > > gcc/ChangeLog: > > > > > > > > > > * tree-ssa-phiopt.cc (copy_phi_args): New function. > > > > > (replace_phi_edge_with_variable): Handle diamond form bb > > > > > with forwarder only empty blocks better. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * gcc.dg/tree-ssa/minmax-15.c: Update test. > > > > > * gcc.dg/tree-ssa/minmax-16.c: Update test. > > > > > * gcc.dg/tree-ssa/minmax-3.c: Update test. > > > > > * gcc.dg/tree-ssa/minmax-4.c: Update test. > > > > > * gcc.dg/tree-ssa/minmax-5.c: Update test. > > > > > * gcc.dg/tree-ssa/minmax-8.c: Update test. > > > > > --- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c | 3 +- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c | 9 ++-- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c | 2 +- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c | 2 +- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c | 2 +- > > > > > gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c | 2 +- > > > > > gcc/tree-ssa-phiopt.cc | 51 > > > > > ++++++++++++++++++++++- > > > > > 7 files changed, 59 insertions(+), 12 deletions(-) > > > > > > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c > > > > > index 8a39871c938..6731f91e6c3 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-15.c > > > > > @@ -30,5 +30,6 @@ main (void) > > > > > return 0; > > > > > } > > > > > > > > > > -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "phiopt1" } } */ > > > > > +/* There should only be two MIN_EXPR left, the 3rd one was removed. > > > > > */ > > > > > +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */ > > > > > /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */ > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c > > > > > index 623b12b3f74..094364e6424 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-16.c > > > > > @@ -25,11 +25,8 @@ main (void) > > > > > return 0; > > > > > } > > > > > > > > > > -/* After phiopt1, there really should be only 3 MIN_EXPR in the IR > > > > > (including debug statements). > > > > > - But the way phiopt does not cleanup the CFG all the time, the PHI > > > > > might still reference the > > > > > - alternative bb's moved statement. > > > > > - Note in the end, we do dce the statement and other debug > > > > > statements to end up with only 2 MIN_EXPR. > > > > > - So check that too. */ > > > > > -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 4 "phiopt1" } } */ > > > > > +/* After phiopt1, will be only 2 MIN_EXPR in the IR (including debug > > > > > statements). */ > > > > > +/* xk will only have the final result so the extra debug info does > > > > > not change anything. */ > > > > > +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */ > > > > > /* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "optimized" } } */ > > > > > /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */ > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c > > > > > index 2af10776346..521afe3e4d9 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-3.c > > > > > @@ -25,5 +25,5 @@ main (void) > > > > > return 0; > > > > > } > > > > > > > > > > -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 3 "phiopt1" } } */ > > > > > +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */ > > > > > /* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "phiopt1" } } */ > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c > > > > > index 973f39bfed3..49e27185b5e 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-4.c > > > > > @@ -26,4 +26,4 @@ main (void) > > > > > } > > > > > > > > > > /* { dg-final { scan-tree-dump-times "MIN_EXPR" 0 "phiopt1" } } */ > > > > > -/* { dg-final { scan-tree-dump-times "MAX_EXPR" 3 "phiopt1" } } */ > > > > > +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 2 "phiopt1" } } */ > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c > > > > > index 34e4e720511..194c881cc98 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-5.c > > > > > @@ -25,5 +25,5 @@ main (void) > > > > > return 0; > > > > > } > > > > > > > > > > -/* { dg-final { scan-tree-dump-times "MIN_EXPR" 2 "phiopt1" } } */ > > > > > +/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "phiopt1" } } */ > > > > > /* { dg-final { scan-tree-dump-times "MAX_EXPR" 1 "phiopt1" } } */ > > > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c > > > > > b/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c > > > > > index 0160e573fef..d5cb53145ea 100644 > > > > > --- a/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c > > > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmax-8.c > > > > > @@ -26,4 +26,4 @@ main (void) > > > > > } > > > > > > > > > > /* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "phiopt1" } } */ > > > > > -/* { dg-final { scan-tree-dump-times "MAX_EXPR" 2 "phiopt1" } } */ > > > > > +/* { dg-final { scan-tree-dump-times "MAX_EXPR" 1 "phiopt1" } } */ > > > > > diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc > > > > > index 65b3deea34a..311423efeb5 100644 > > > > > --- a/gcc/tree-ssa-phiopt.cc > > > > > +++ b/gcc/tree-ssa-phiopt.cc > > > > > @@ -82,6 +82,25 @@ single_non_singleton_phi_for_edges (gimple_seq > > > > > seq, edge e0, edge e1) > > > > > return phi; > > > > > } > > > > > > > > > > +/* For each PHI in BB, copy the argument associated with SRC_E to > > > > > TGT_E. */ > > > > > + > > > > > +static void > > > > > +copy_phi_args (basic_block bb, edge src_e, edge tgt_e) > > > > > +{ > > > > > + gphi_iterator gsi; > > > > > + int src_indx = src_e->dest_idx; > > > > > + > > > > > + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > > > > > + { > > > > > + gphi *phi = gsi.phi (); > > > > > + tree def = gimple_phi_arg_def (phi, src_indx); > > > > > + location_t locus = gimple_phi_arg_location (phi, src_indx); > > > > > + > > > > > + add_phi_arg (phi, def, tgt_e, locus); > > > > > + } > > > > > +} > > > > > > > > Doesn't flush_pending_stmts (tgt_e) do this? > > > > > > No, In fact the above code is very similar to the code from > > > remove_forwarder_block in tree-cfgcleanup.cc (I copied it and changed > > > it from copy_phi_args in tree-ssa-threadupdate.cc though as I don't > > > need a mapping). > > > Let me factor out the code from remove_forwarder_block and put it in > > > some common spot and then use that; it will be the same logic even. > > > > Hmm, but it's odd - if you redirect an edge on GIMPLE then there should > > be helpers available to do all this. I think you're doing something wrong > > (without actually looking too close) > > Maybe some (crude) diagrams are needed to explain why we need to copy > the entries for the phi nodes from one edge to another. > > So the original BB structure is: > > BB > /e1 \e2 > BB1 BB2 > \e3 /e4 > BB3 > BB3 has a few phi nodes (except for one of the phi nodes, the entries > for BB1, BB2 are all the same). > When you redirect e1 (or e2) to BB3, we create new entries in the phi > nodes for that edge now as it was not there before. > So the shape is: > BB > |e1 (or e2) > BB3 > > but since it is a new entry in the PHI node, it will be a nullptr. So > we need to copy them from the e3 or e4 entries. > Does that make sense on why the new function is needed here? This is > not a normal operation done by any other pass either.
Hmm, OK. So why can you elide BB1 here? Indeed the SSA redirect_edge hook only picks the PHI args from the original destination, it would be probably more "natural" to redirect the e3 source to BB (redirect_edge_pred) and remove the e1 edge? That said, properly "decomposing" this CFG manipulation would be nice. > An example of where this API is used in is remove_forwarder_block is > doing something similar but instead of 4 BB, it only has 3: > BB > | e1 > BBF (empty forward BB) > | e2 > BB2 > So it redirects e1 to BB2 (bypassing BBF) and the phi nodes need to be > copied from e2. > > Thanks, > Andrew > > > > > > Richard. > > > > > Thanks, > > > Andrew > > > > > > > > > > > > + > > > > > + > > > > > /* Replace PHI node element whose edge is E in block BB with > > > > > variable NEW. > > > > > Remove the edge from COND_BLOCK which does not lead to BB > > > > > (COND_BLOCK > > > > > is known to have two edges, one of which must reach BB). */ > > > > > @@ -94,6 +113,7 @@ replace_phi_edge_with_variable (basic_block > > > > > cond_block, > > > > > basic_block bb = gimple_bb (phi); > > > > > gimple_stmt_iterator gsi; > > > > > tree phi_result = PHI_RESULT (phi); > > > > > + bool deleteboth = false; > > > > > > > > > > /* Duplicate range info if they are the only things setting the > > > > > target PHI. > > > > > This is needed as later on, the new_tree will be replacing > > > > > @@ -137,7 +157,14 @@ replace_phi_edge_with_variable (basic_block > > > > > cond_block, > > > > > keep_edge = EDGE_SUCC (cond_block, 1); > > > > > } > > > > > else if ((keep_edge = find_edge (cond_block, e->src))) > > > > > - ; > > > > > + { > > > > > + basic_block bb1 = EDGE_SUCC (cond_block, 0)->dest; > > > > > + basic_block bb2 = EDGE_SUCC (cond_block, 1)->dest; > > > > > + if (single_pred_p (bb1) && single_pred_p (bb2) > > > > > + && single_succ_p (bb1) && single_succ_p (bb2) > > > > > + && empty_block_p (bb1) && empty_block_p (bb2)) > > > > > + deleteboth = true; > > > > > + } > > > > > else > > > > > gcc_unreachable (); > > > > > > > > > > @@ -148,6 +175,28 @@ replace_phi_edge_with_variable (basic_block > > > > > cond_block, > > > > > e->probability = profile_probability::always (); > > > > > delete_basic_block (edge_to_remove->dest); > > > > > > > > > > + /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ > > > > > + gsi = gsi_last_bb (cond_block); > > > > > + gsi_remove (&gsi, true); > > > > > + } > > > > > + else if (deleteboth) > > > > > + { > > > > > + basic_block bb1 = EDGE_SUCC (cond_block, 0)->dest; > > > > > + basic_block bb2 = EDGE_SUCC (cond_block, 1)->dest; > > > > > + > > > > > + edge newedge = redirect_edge_and_branch (keep_edge, bb); > > > > > + /* no new edge should be the same. */ > > > > > + gcc_assert (newedge == keep_edge); > > > > > + keep_edge->flags |= EDGE_FALLTHRU; > > > > > + keep_edge->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); > > > > > + keep_edge->probability = profile_probability::always (); > > > > > + /* Copy the edge's phi entry from the old one */ > > > > > + copy_phi_args(bb, e, keep_edge); > > > > > + > > > > > + /* Delete the old 2 empty basic blocks */ > > > > > + delete_basic_block (bb1); > > > > > + delete_basic_block (bb2); > > > > > + > > > > > /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ > > > > > gsi = gsi_last_bb (cond_block); > > > > > gsi_remove (&gsi, true); > > > > > -- > > > > > 2.31.1 > > > > >