On Wed, Mar 18, 2026 at 11:11 AM Konstantinos Eleftheriou
<[email protected]> wrote:
>
> Please ignore the change for "gcc.dg/guality/pr54693-2.c". It turns out that
> the tweak is not needed with the updated
> solution and we will revert it if this gets accepted.
>
> Thanks,
> Konstantinos
>
> On Tue, Mar 17, 2026 at 2:23 PM Konstantinos Eleftheriou
> <[email protected]> wrote:
>>
>> After tail merging combines duplicate blocks, their predecessors branch
>> to the same successor. On targets that support conditional compares
>> (ccmp), combine the sequential conditions leading to the merged block
>> using the ifcombine infrastructure. This enables the generation of ccmp
>> instructions.
>>
>> The candidate selection identifies predecessor blocks of the merged block
>> that have conditional branches, excluding the immediate dominator.
>> After the tail-merge loop completes, dominance info is (re)computed, SSA
>> names that may be undefined are marked, and tree_ssa_ifcombine_bb is
>> called for each candidate.
>>
>> To avoid bogus -Wmaybe-uninitialized warnings, candidates are skipped
>> when the dominance subtree of a successor has PHI arguments that are
>> maybe-undef at the dominance frontier, as combining conditions in that
>> case would change the predicate structure the uninit analysis relies on.
>>
>> The xfail condition for gcc.dg/guality/pr54693-2.c is adjusted because
>> the newly combined conditions change the debug info on AArch64.
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/102793
>>
>> * tree-ssa-ifcombine.h: Remove extra blank line at end.
>> * tree-ssa-tail-merge.cc: Include target.h, tree-ssa-ifcombine.h,
>> and tree-ssa.h.
>> (ifcombine_candidate_bbs): New static bitmap.
>> (apply_clusters): After replacing a block, identify predecessors
>> of the merged block as ifcombine candidates.
>> (maybe_undef_at_dom_frontier_p): New function.
>> (tail_merge_optimize): Compute dominance info when needed for
>> ifcombine. Call mark_ssa_maybe_undefs and tree_ssa_ifcombine_bb
>> for each candidate on ccmp targets, skipping candidates with
>> maybe-undef PHI args at the dominance frontier. Return
>> TODO_cleanup_cfg when ifcombine changed the CFG.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR tree-optimization/102793
>>
>> * gcc.dg/guality/pr54693-2.c: Adjust xfail condition for
>> AArch64 to account for the new ccmp-related transformations.
>> * gcc.dg/tree-ssa/pr102793-1.c: New test.
>> * gcc.dg/tree-ssa/pr102793-2.c: New test.
>>
>> Signed-off-by: Konstantinos Eleftheriou <[email protected]>
>> ---
>>
>> Changes in v2:
>> - Use tree_ssa_ifcombine_bb for combining conditions.
>>
>> gcc/testsuite/gcc.dg/guality/pr54693-2.c | 2 +-
>> gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c | 50 ++++++++
>> gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c | 51 +++++++++
>> gcc/tree-ssa-ifcombine.h | 1 -
>> gcc/tree-ssa-tail-merge.cc | 126 ++++++++++++++++++++-
>> 5 files changed, 225 insertions(+), 5 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
>> b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
>> index 229ef0efbea0..9d0080782410 100644
>> --- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
>> +++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
>> @@ -18,7 +18,7 @@ foo (int x, int y, int z)
>> while (x > 3 && y > 3 && z > 3)
>> { /* { dg-final { gdb-test .+2 "i" "v + 1" } } */
>> /* { dg-final { gdb-test .+1 "x" "10 - i" { xfail {
>> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */
>> - bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
>> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
>> + bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
>> aarch64*-*-* && { { any-opts "-flto" } && { no-opts "-fno-use-linker-plugin"
>> } } } } } } */
>> /* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail {
>> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
>> i++, x--, y -= 2, z -= 3;
>> }
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>> new file mode 100644
>> index 000000000000..074abdf4936b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>> @@ -0,0 +1,50 @@
>> +/* { dg-do compile } */
>> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
>> +/* { dg-options "-O3 -fdump-tree-pre" } */
>> +
>> +typedef unsigned long uint64_t;
>> +
>> +int foo(void);
>> +
>> +int ccmp(uint64_t* s1, uint64_t* s2)
>> +{
>> + uint64_t d1, d2, bar;
>> + d1 = *s1++;
>> + d2 = *s2++;
>> + bar = (d1 ^ d2) & 0xabcd;
>> + if (bar == 0 || d1 != d2)
>> + return foo();
>> + return 0;
>> +}
>> +
>> +int noccmp0(uint64_t* s1, uint64_t* s2)
>> +{
>> + uint64_t d1, d2, bar;
>> +
>> + d1 = *s1++;
>> + d2 = *s2++;
>> + bar = (d1 ^ d2) & 0xabcd;
>> + if (bar == 0)
>> + return foo();
>> + if (d1 != d2)
>> + return foo();
>> + return 0;
>> +}
>> +
>> +int noccmp1(uint64_t* s1, uint64_t* s2)
>> +{
>> + uint64_t d1, d2, d3, d4, bar;
>> + d1 = *s1++;
>> + d2 = *s2++;
>> + d3 = *s1++;
>> + d4 = *s2++;
>> + bar = (d1 ^ d2) & 0xabcd;
>> + if (bar == 0)
>> + return foo();
>> + if (d3 != d4)
>> + return foo();
>> + return 0;
>> +}
>> +
>> +/* Check for condition assignments for noccmp0 and noccmp1. */
>> +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n _\d+
>> = bar_\d+ == 0;} 2 "pre" } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>> new file mode 100644
>> index 000000000000..99eb52d35f54
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>> @@ -0,0 +1,51 @@
>> +/* { dg-do compile } */
>> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
>> +/* { dg-options "-O3 -fdump-tree-pre" } */
>> +
>> +typedef unsigned long uint64_t;
>> +
>> +int foo(void);
>> +
>> +uint64_t noccmp0(uint64_t* s1, uint64_t* s2)
>> +{
>> + uint64_t d1, d2, d3, d4, bar;
>> + d1 = *s1++;
>> + d2 = *s2++;
>> + d3 = *s1++;
>> + d4 = *s2++;
>> + bar = (d1 ^ d2) & 0xabcd;
>> + if (bar == 0)
>> + return foo();
>> + if (d3 != d4)
>> + d3++;
>> + else
>> + return foo();
>> + return d3;
>> +}
>> +
>> +uint64_t noccmp1(uint64_t* s1, uint64_t* s2)
>> +{
>> + uint64_t d1, d2, d3, d4, bar;
>> + d1 = *s1++;
>> + d2 = *s2++;
>> + d3 = *s1++;
>> + d4 = *s2++;
>> + bar = (d1 ^ d2) & 0xabcd;
>> + if (bar == 0)
>> + d3++;
>> + else
>> + return foo();
>> + if (d3 > d4)
>> + d3++;
>> + else if (d1 != d2)
>> + return foo ();
>> + d3 = d3 + d4 + 1;
>> + return d3;
>> +}
>> +
>> +/* Check for condition assignments in the case that the transformation
>> + is applied.
>> + The transformation should not be applied on noccmp1, where the foo call
>> is
>> + on the false branch of the first condition. */
>> +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n _\d+
>> = bar_\d+ != 0;} 1 "pre" } } */
>> +/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */
>> diff --git a/gcc/tree-ssa-ifcombine.h b/gcc/tree-ssa-ifcombine.h
>> index fc1e3a100cd1..95daac2d942a 100644
>> --- a/gcc/tree-ssa-ifcombine.h
>> +++ b/gcc/tree-ssa-ifcombine.h
>> @@ -26,4 +26,3 @@ bool tree_ssa_ifcombine_bb (basic_block);
>>
>> #endif
>>
>> -
>> diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc
>> index 4b9dbd886b6a..f72b30134c54 100644
>> --- a/gcc/tree-ssa-tail-merge.cc
>> +++ b/gcc/tree-ssa-tail-merge.cc
>> @@ -189,6 +189,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "system.h"
>> #include "coretypes.h"
>> #include "backend.h"
>> +#include "target.h"
>> #include "tree.h"
>> #include "gimple.h"
>> #include "cfghooks.h"
>> @@ -202,9 +203,11 @@ along with GCC; see the file COPYING3. If not see
>> #include "tree-cfg.h"
>> #include "tree-into-ssa.h"
>> #include "tree-ssa-sccvn.h"
>> +#include "tree-ssa-ifcombine.h"
>> #include "cfgloop.h"
>> #include "tree-eh.h"
>> #include "tree-cfgcleanup.h"
>> +#include "tree-ssa.h"
>>
>> const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
>>
>> @@ -1707,6 +1710,8 @@ replace_block_by (basic_block bb1, basic_block bb2)
>>
>> static bitmap update_bbs;
>>
>> +static bitmap ifcombine_candidate_bbs;
>> +
>> /* For each cluster in all_clusters, merge all cluster->bbs. Returns
>> number of bbs removed. */
>>
>> @@ -1735,6 +1740,27 @@ apply_clusters (void)
>> bitmap_clear_bit (update_bbs, bb1->index);
>>
>> replace_block_by (bb1, bb2);
>> +
>> + basic_block imm_dominator = get_immediate_dominator (
>> + CDI_DOMINATORS, bb2);
>> +
>> + /* Find conditions in if-statements that lead to bb. */
>> + edge e;
>> + edge_iterator ei;
>> + FOR_EACH_EDGE (e, ei, bb2->preds)
>> + {
>> + basic_block then_tmp = NULL;
>> + basic_block else_tmp = NULL;
>> + if (recognize_if_then_else (e->src, &bb2, &else_tmp)
>> + || recognize_if_then_else (e->src, &then_tmp, &bb2))
>> + {
>> + gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb
>> (e->src));
>> + if (cond)
>> + if (e->src != imm_dominator)
this looks like a condition that can be checked even before calling
recognize_if_then_else. I'd also say you can assert there's a gcond
if recognize_if_then_else was successful.
>> + bitmap_set_bit (ifcombine_candidate_bbs, e->src->index);
>> + }
>> + }
>> +
>> nr_bbs_removed++;
>> }
>> }
>> @@ -1797,6 +1823,55 @@ update_debug_stmts (void)
>> }
>> }
>>
>> +/* Return true if the dominance subtree rooted at BB has any outgoing edge
>> + to a block outside the subtree where the PHI at the target has a
>> + maybe-undef argument on that edge. */
>> +
>> +static bool
>> +maybe_undef_at_dom_frontier_p (basic_block bb)
>> +{
>> + auto_vec<basic_block, 32> dom_bbs;
>> + auto_bitmap dominated;
>> +
>> + /* Collect all blocks dominated by BB. */
>> + dom_bbs.safe_push (bb);
>> + bitmap_set_bit (dominated, bb->index);
>> + for (unsigned int wi = 0; wi < dom_bbs.length (); wi++)
>> + {
>> + basic_block b = dom_bbs[wi];
>> + for (basic_block son = first_dom_son (CDI_DOMINATORS, b);
>> + son; son = next_dom_son (CDI_DOMINATORS, son))
>> + {
>> + bitmap_set_bit (dominated, son->index);
>> + dom_bbs.safe_push (son);
>> + }
>> + }
get_all_dominated_blocks?
I'll note this is now O (number-of-ifcombine-candidates * function-size), aka
quadratic.
IMO we shouldn't work around uninit diagnostic deficiencies here but try to
deal with those in uninit.
Otherwise looks OK to me. I do wonder whether we should really
conditionalize this on ccmp availability though. ifcombine itself
doesn't, and if it were to run after tail-merging we'd merge those
compares.
Richard.
>> +
>> + /* Check edges from dominated blocks to non-dominated blocks
>> + for maybe-undef PHI arguments. */
>> + for (auto b : dom_bbs)
>> + {
>> + edge e;
>> + edge_iterator ei;
>> + FOR_EACH_EDGE (e, ei, b->succs)
>> + {
>> + if (bitmap_bit_p (dominated, e->dest->index))
>> + continue;
>> + for (gphi_iterator gsi = gsi_start_phis (e->dest);
>> + !gsi_end_p (gsi); gsi_next (&gsi))
>> + {
>> + gphi *phi = gsi.phi ();
>> + tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
>> + if (TREE_CODE (arg) == SSA_NAME
>> + && ssa_name_maybe_undef_p (arg))
>> + return true;
>> + }
>> + }
>> + }
>> +
>> + return false;
>> +}
>> +
>> /* Runs tail merge optimization. */
>>
>> unsigned int
>> @@ -1807,6 +1882,7 @@ tail_merge_optimize (bool need_crit_edge_split)
>> bool loop_entered = false;
>> int iteration_nr = 0;
>> int max_iterations = param_max_tail_merge_iterations;
>> + unsigned int todo = 0;
>>
>> if (!flag_tree_tail_merge
>> || max_iterations == 0)
>> @@ -1833,6 +1909,7 @@ tail_merge_optimize (bool need_crit_edge_split)
>> loop_entered = true;
>> alloc_cluster_vectors ();
>> update_bbs = BITMAP_ALLOC (NULL);
>> + ifcombine_candidate_bbs = BITMAP_ALLOC (NULL);
>> }
>> else
>> reset_cluster_vectors ();
>> @@ -1866,10 +1943,50 @@ tail_merge_optimize (bool need_crit_edge_split)
>>
>> if (nr_bbs_removed_total > 0)
>> {
>> + bool need_dominance
>> + = MAY_HAVE_DEBUG_BIND_STMTS
>> + || (targetm.have_ccmp ()
>> + && !bitmap_empty_p (ifcombine_candidate_bbs));
>> +
>> + if (need_dominance)
>> + calculate_dominance_info (CDI_DOMINATORS);
>> +
>> if (MAY_HAVE_DEBUG_BIND_STMTS)
>> + update_debug_stmts ();
>> +
>> + unsigned int i;
>> + bitmap_iterator bi;
>> + bool cfg_changed = false;
>> + /* On targets that support conditional compares (ccmp), try to combine
>> + conditions of blocks that were made to branch to the same successor
>> + by tail merging. This is gated on ccmp support because it will
>> + produce beneficial code when ccmp instructions are available. */
>> + if (targetm.have_ccmp ()
>> + && !bitmap_empty_p (ifcombine_candidate_bbs))
>> {
>> - calculate_dominance_info (CDI_DOMINATORS);
>> - update_debug_stmts ();
>> + mark_ssa_maybe_undefs ();
>> + EXECUTE_IF_SET_IN_BITMAP (ifcombine_candidate_bbs, 0, i, bi)
>> + {
>> + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
>> + if (!bb)
>> + continue;
>> +
>> + /* Do not combine conditions when the common successor's
>> + dominance subtree has PHI arguments that are maybe-undef.
>> */
>> + edge e;
>> + edge_iterator ei;
>> + bool dominated_undef = false;
>> + FOR_EACH_EDGE (e, ei, bb->succs)
>> + if (maybe_undef_at_dom_frontier_p (e->dest))
>> + {
>> + dominated_undef = true;
>> + break;
>> + }
>> + if (dominated_undef)
>> + continue;
>> +
>> + cfg_changed |= tree_ssa_ifcombine_bb (bb);
>> + }
>> }
>>
>> if (dump_file && (dump_flags & TDF_DETAILS))
>> @@ -1879,6 +1996,8 @@ tail_merge_optimize (bool need_crit_edge_split)
>> }
>>
>> mark_virtual_operands_for_renaming (cfun);
>> +
>> + todo |= cfg_changed ? TODO_cleanup_cfg : 0;
>> }
>>
>> delete_worklist ();
>> @@ -1886,9 +2005,10 @@ tail_merge_optimize (bool need_crit_edge_split)
>> {
>> delete_cluster_vectors ();
>> BITMAP_FREE (update_bbs);
>> + BITMAP_FREE (ifcombine_candidate_bbs);
>> }
>>
>> timevar_pop (TV_TREE_TAIL_MERGE);
>>
>> - return 0;
>> + return todo;
>> }
>> --
>> 2.52.0
>>