Please ignore the change for "gcc.dg/guality/pr54693-2.c". It turns out that the tweak is not needed with the updated solution and we will revert it if this gets accepted.
Thanks, Konstantinos On Tue, Mar 17, 2026 at 2:23 PM Konstantinos Eleftheriou < [email protected]> wrote: > After tail merging combines duplicate blocks, their predecessors branch > to the same successor. On targets that support conditional compares > (ccmp), combine the sequential conditions leading to the merged block > using the ifcombine infrastructure. This enables the generation of ccmp > instructions. > > The candidate selection identifies predecessor blocks of the merged block > that have conditional branches, excluding the immediate dominator. > After the tail-merge loop completes, dominance info is (re)computed, SSA > names that may be undefined are marked, and tree_ssa_ifcombine_bb is > called for each candidate. > > To avoid bogus -Wmaybe-uninitialized warnings, candidates are skipped > when the dominance subtree of a successor has PHI arguments that are > maybe-undef at the dominance frontier, as combining conditions in that > case would change the predicate structure the uninit analysis relies on. > > The xfail condition for gcc.dg/guality/pr54693-2.c is adjusted because > the newly combined conditions change the debug info on AArch64. > > gcc/ChangeLog: > > PR tree-optimization/102793 > > * tree-ssa-ifcombine.h: Remove extra blank line at end. > * tree-ssa-tail-merge.cc: Include target.h, tree-ssa-ifcombine.h, > and tree-ssa.h. > (ifcombine_candidate_bbs): New static bitmap. > (apply_clusters): After replacing a block, identify predecessors > of the merged block as ifcombine candidates. > (maybe_undef_at_dom_frontier_p): New function. > (tail_merge_optimize): Compute dominance info when needed for > ifcombine. Call mark_ssa_maybe_undefs and tree_ssa_ifcombine_bb > for each candidate on ccmp targets, skipping candidates with > maybe-undef PHI args at the dominance frontier. Return > TODO_cleanup_cfg when ifcombine changed the CFG. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/102793 > > * gcc.dg/guality/pr54693-2.c: Adjust xfail condition for > AArch64 to account for the new ccmp-related transformations. > * gcc.dg/tree-ssa/pr102793-1.c: New test. > * gcc.dg/tree-ssa/pr102793-2.c: New test. > > Signed-off-by: Konstantinos Eleftheriou <[email protected] > > > --- > > Changes in v2: > - Use tree_ssa_ifcombine_bb for combining conditions. > > gcc/testsuite/gcc.dg/guality/pr54693-2.c | 2 +- > gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c | 50 ++++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c | 51 +++++++++ > gcc/tree-ssa-ifcombine.h | 1 - > gcc/tree-ssa-tail-merge.cc | 126 ++++++++++++++++++++- > 5 files changed, 225 insertions(+), 5 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c > > diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c > b/gcc/testsuite/gcc.dg/guality/pr54693-2.c > index 229ef0efbea0..9d0080782410 100644 > --- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c > +++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c > @@ -18,7 +18,7 @@ foo (int x, int y, int z) > while (x > 3 && y > 3 && z > 3) > { /* { dg-final { gdb-test .+2 "i" "v + 1" } } */ > /* { dg-final { gdb-test .+1 "x" "10 - i" { xfail { > aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */ > - bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { > aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */ > + bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { > aarch64*-*-* && { { any-opts "-flto" } && { no-opts > "-fno-use-linker-plugin" } } } } } } */ > /* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail { > aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */ > i++, x--, y -= 2, z -= 3; > } > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c > new file mode 100644 > index 000000000000..074abdf4936b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c > @@ -0,0 +1,50 @@ > +/* { dg-do compile } */ > +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } > */ > +/* { dg-options "-O3 -fdump-tree-pre" } */ > + > +typedef unsigned long uint64_t; > + > +int foo(void); > + > +int ccmp(uint64_t* s1, uint64_t* s2) > +{ > + uint64_t d1, d2, bar; > + d1 = *s1++; > + d2 = *s2++; > + bar = (d1 ^ d2) & 0xabcd; > + if (bar == 0 || d1 != d2) > + return foo(); > + return 0; > +} > + > +int noccmp0(uint64_t* s1, uint64_t* s2) > +{ > + uint64_t d1, d2, bar; > + > + d1 = *s1++; > + d2 = *s2++; > + bar = (d1 ^ d2) & 0xabcd; > + if (bar == 0) > + return foo(); > + if (d1 != d2) > + return foo(); > + return 0; > +} > + > +int noccmp1(uint64_t* s1, uint64_t* s2) > +{ > + uint64_t d1, d2, d3, d4, bar; > + d1 = *s1++; > + d2 = *s2++; > + d3 = *s1++; > + d4 = *s2++; > + bar = (d1 ^ d2) & 0xabcd; > + if (bar == 0) > + return foo(); > + if (d3 != d4) > + return foo(); > + return 0; > +} > + > +/* Check for condition assignments for noccmp0 and noccmp1. */ > +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n > _\d+ = bar_\d+ == 0;} 2 "pre" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c > new file mode 100644 > index 000000000000..99eb52d35f54 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c > @@ -0,0 +1,51 @@ > +/* { dg-do compile } */ > +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } > */ > +/* { dg-options "-O3 -fdump-tree-pre" } */ > + > +typedef unsigned long uint64_t; > + > +int foo(void); > + > +uint64_t noccmp0(uint64_t* s1, uint64_t* s2) > +{ > + uint64_t d1, d2, d3, d4, bar; > + d1 = *s1++; > + d2 = *s2++; > + d3 = *s1++; > + d4 = *s2++; > + bar = (d1 ^ d2) & 0xabcd; > + if (bar == 0) > + return foo(); > + if (d3 != d4) > + d3++; > + else > + return foo(); > + return d3; > +} > + > +uint64_t noccmp1(uint64_t* s1, uint64_t* s2) > +{ > + uint64_t d1, d2, d3, d4, bar; > + d1 = *s1++; > + d2 = *s2++; > + d3 = *s1++; > + d4 = *s2++; > + bar = (d1 ^ d2) & 0xabcd; > + if (bar == 0) > + d3++; > + else > + return foo(); > + if (d3 > d4) > + d3++; > + else if (d1 != d2) > + return foo (); > + d3 = d3 + d4 + 1; > + return d3; > +} > + > +/* Check for condition assignments in the case that the transformation > + is applied. > + The transformation should not be applied on noccmp1, where the foo > call is > + on the false branch of the first condition. */ > +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n > _\d+ = bar_\d+ != 0;} 1 "pre" } } */ > +/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */ > diff --git a/gcc/tree-ssa-ifcombine.h b/gcc/tree-ssa-ifcombine.h > index fc1e3a100cd1..95daac2d942a 100644 > --- a/gcc/tree-ssa-ifcombine.h > +++ b/gcc/tree-ssa-ifcombine.h > @@ -26,4 +26,3 @@ bool tree_ssa_ifcombine_bb (basic_block); > > #endif > > - > diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc > index 4b9dbd886b6a..f72b30134c54 100644 > --- a/gcc/tree-ssa-tail-merge.cc > +++ b/gcc/tree-ssa-tail-merge.cc > @@ -189,6 +189,7 @@ along with GCC; see the file COPYING3. If not see > #include "system.h" > #include "coretypes.h" > #include "backend.h" > +#include "target.h" > #include "tree.h" > #include "gimple.h" > #include "cfghooks.h" > @@ -202,9 +203,11 @@ along with GCC; see the file COPYING3. If not see > #include "tree-cfg.h" > #include "tree-into-ssa.h" > #include "tree-ssa-sccvn.h" > +#include "tree-ssa-ifcombine.h" > #include "cfgloop.h" > #include "tree-eh.h" > #include "tree-cfgcleanup.h" > +#include "tree-ssa.h" > > const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE; > > @@ -1707,6 +1710,8 @@ replace_block_by (basic_block bb1, basic_block bb2) > > static bitmap update_bbs; > > +static bitmap ifcombine_candidate_bbs; > + > /* For each cluster in all_clusters, merge all cluster->bbs. Returns > number of bbs removed. */ > > @@ -1735,6 +1740,27 @@ apply_clusters (void) > bitmap_clear_bit (update_bbs, bb1->index); > > replace_block_by (bb1, bb2); > + > + basic_block imm_dominator = get_immediate_dominator ( > + CDI_DOMINATORS, bb2); > + > + /* Find conditions in if-statements that lead to bb. */ > + edge e; > + edge_iterator ei; > + FOR_EACH_EDGE (e, ei, bb2->preds) > + { > + basic_block then_tmp = NULL; > + basic_block else_tmp = NULL; > + if (recognize_if_then_else (e->src, &bb2, &else_tmp) > + || recognize_if_then_else (e->src, &then_tmp, &bb2)) > + { > + gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb > (e->src)); > + if (cond) > + if (e->src != imm_dominator) > + bitmap_set_bit (ifcombine_candidate_bbs, > e->src->index); > + } > + } > + > nr_bbs_removed++; > } > } > @@ -1797,6 +1823,55 @@ update_debug_stmts (void) > } > } > > +/* Return true if the dominance subtree rooted at BB has any outgoing edge > + to a block outside the subtree where the PHI at the target has a > + maybe-undef argument on that edge. */ > + > +static bool > +maybe_undef_at_dom_frontier_p (basic_block bb) > +{ > + auto_vec<basic_block, 32> dom_bbs; > + auto_bitmap dominated; > + > + /* Collect all blocks dominated by BB. */ > + dom_bbs.safe_push (bb); > + bitmap_set_bit (dominated, bb->index); > + for (unsigned int wi = 0; wi < dom_bbs.length (); wi++) > + { > + basic_block b = dom_bbs[wi]; > + for (basic_block son = first_dom_son (CDI_DOMINATORS, b); > + son; son = next_dom_son (CDI_DOMINATORS, son)) > + { > + bitmap_set_bit (dominated, son->index); > + dom_bbs.safe_push (son); > + } > + } > + > + /* Check edges from dominated blocks to non-dominated blocks > + for maybe-undef PHI arguments. */ > + for (auto b : dom_bbs) > + { > + edge e; > + edge_iterator ei; > + FOR_EACH_EDGE (e, ei, b->succs) > + { > + if (bitmap_bit_p (dominated, e->dest->index)) > + continue; > + for (gphi_iterator gsi = gsi_start_phis (e->dest); > + !gsi_end_p (gsi); gsi_next (&gsi)) > + { > + gphi *phi = gsi.phi (); > + tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e); > + if (TREE_CODE (arg) == SSA_NAME > + && ssa_name_maybe_undef_p (arg)) > + return true; > + } > + } > + } > + > + return false; > +} > + > /* Runs tail merge optimization. */ > > unsigned int > @@ -1807,6 +1882,7 @@ tail_merge_optimize (bool need_crit_edge_split) > bool loop_entered = false; > int iteration_nr = 0; > int max_iterations = param_max_tail_merge_iterations; > + unsigned int todo = 0; > > if (!flag_tree_tail_merge > || max_iterations == 0) > @@ -1833,6 +1909,7 @@ tail_merge_optimize (bool need_crit_edge_split) > loop_entered = true; > alloc_cluster_vectors (); > update_bbs = BITMAP_ALLOC (NULL); > + ifcombine_candidate_bbs = BITMAP_ALLOC (NULL); > } > else > reset_cluster_vectors (); > @@ -1866,10 +1943,50 @@ tail_merge_optimize (bool need_crit_edge_split) > > if (nr_bbs_removed_total > 0) > { > + bool need_dominance > + = MAY_HAVE_DEBUG_BIND_STMTS > + || (targetm.have_ccmp () > + && !bitmap_empty_p (ifcombine_candidate_bbs)); > + > + if (need_dominance) > + calculate_dominance_info (CDI_DOMINATORS); > + > if (MAY_HAVE_DEBUG_BIND_STMTS) > + update_debug_stmts (); > + > + unsigned int i; > + bitmap_iterator bi; > + bool cfg_changed = false; > + /* On targets that support conditional compares (ccmp), try to > combine > + conditions of blocks that were made to branch to the same > successor > + by tail merging. This is gated on ccmp support because it will > + produce beneficial code when ccmp instructions are available. */ > + if (targetm.have_ccmp () > + && !bitmap_empty_p (ifcombine_candidate_bbs)) > { > - calculate_dominance_info (CDI_DOMINATORS); > - update_debug_stmts (); > + mark_ssa_maybe_undefs (); > + EXECUTE_IF_SET_IN_BITMAP (ifcombine_candidate_bbs, 0, i, bi) > + { > + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); > + if (!bb) > + continue; > + > + /* Do not combine conditions when the common successor's > + dominance subtree has PHI arguments that are > maybe-undef. */ > + edge e; > + edge_iterator ei; > + bool dominated_undef = false; > + FOR_EACH_EDGE (e, ei, bb->succs) > + if (maybe_undef_at_dom_frontier_p (e->dest)) > + { > + dominated_undef = true; > + break; > + } > + if (dominated_undef) > + continue; > + > + cfg_changed |= tree_ssa_ifcombine_bb (bb); > + } > } > > if (dump_file && (dump_flags & TDF_DETAILS)) > @@ -1879,6 +1996,8 @@ tail_merge_optimize (bool need_crit_edge_split) > } > > mark_virtual_operands_for_renaming (cfun); > + > + todo |= cfg_changed ? TODO_cleanup_cfg : 0; > } > > delete_worklist (); > @@ -1886,9 +2005,10 @@ tail_merge_optimize (bool need_crit_edge_split) > { > delete_cluster_vectors (); > BITMAP_FREE (update_bbs); > + BITMAP_FREE (ifcombine_candidate_bbs); > } > > timevar_pop (TV_TREE_TAIL_MERGE); > > - return 0; > + return todo; > } > -- > 2.52.0 > >
