Please ignore the change for "gcc.dg/guality/pr54693-2.c". It turns out
that the tweak is not needed with the updated
solution and we will revert it if this gets accepted.

Thanks,
Konstantinos

On Tue, Mar 17, 2026 at 2:23 PM Konstantinos Eleftheriou <
[email protected]> wrote:

> After tail merging combines duplicate blocks, their predecessors branch
> to the same successor.  On targets that support conditional compares
> (ccmp), combine the sequential conditions leading to the merged block
> using the ifcombine infrastructure.  This enables the generation of ccmp
> instructions.
>
> The candidate selection identifies predecessor blocks of the merged block
> that have conditional branches, excluding the immediate dominator.
> After the tail-merge loop completes, dominance info is (re)computed, SSA
> names that may be undefined are marked, and tree_ssa_ifcombine_bb is
> called for each candidate.
>
> To avoid bogus -Wmaybe-uninitialized warnings, candidates are skipped
> when the dominance subtree of a successor has PHI arguments that are
> maybe-undef at the dominance frontier, as combining conditions in that
> case would change the predicate structure the uninit analysis relies on.
>
> The xfail condition for gcc.dg/guality/pr54693-2.c is adjusted because
> the newly combined conditions change the debug info on AArch64.
>
> gcc/ChangeLog:
>
>         PR tree-optimization/102793
>
>         * tree-ssa-ifcombine.h: Remove extra blank line at end.
>         * tree-ssa-tail-merge.cc: Include target.h, tree-ssa-ifcombine.h,
>         and tree-ssa.h.
>         (ifcombine_candidate_bbs): New static bitmap.
>         (apply_clusters): After replacing a block, identify predecessors
>         of the merged block as ifcombine candidates.
>         (maybe_undef_at_dom_frontier_p): New function.
>         (tail_merge_optimize): Compute dominance info when needed for
>         ifcombine.  Call mark_ssa_maybe_undefs and tree_ssa_ifcombine_bb
>         for each candidate on ccmp targets, skipping candidates with
>         maybe-undef PHI args at the dominance frontier.  Return
>         TODO_cleanup_cfg when ifcombine changed the CFG.
>
> gcc/testsuite/ChangeLog:
>
>         PR tree-optimization/102793
>
>         * gcc.dg/guality/pr54693-2.c: Adjust xfail condition for
>         AArch64 to account for the new ccmp-related transformations.
>         * gcc.dg/tree-ssa/pr102793-1.c: New test.
>         * gcc.dg/tree-ssa/pr102793-2.c: New test.
>
> Signed-off-by: Konstantinos Eleftheriou <[email protected]
> >
> ---
>
> Changes in v2:
> - Use tree_ssa_ifcombine_bb for combining conditions.
>
>  gcc/testsuite/gcc.dg/guality/pr54693-2.c   |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c |  50 ++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c |  51 +++++++++
>  gcc/tree-ssa-ifcombine.h                   |   1 -
>  gcc/tree-ssa-tail-merge.cc                 | 126 ++++++++++++++++++++-
>  5 files changed, 225 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
>
> diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> index 229ef0efbea0..9d0080782410 100644
> --- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> +++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
> @@ -18,7 +18,7 @@ foo (int x, int y, int z)
>    while (x > 3 && y > 3 && z > 3)
>      {          /* { dg-final { gdb-test .+2 "i" "v + 1" } } */
>                 /* { dg-final { gdb-test .+1 "x" "10 - i" { xfail {
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */
> -      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
> +      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail {
> aarch64*-*-* && { { any-opts "-flto" } && { no-opts
> "-fno-use-linker-plugin" } } } } } } */
>                 /* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail {
> aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
>        i++, x--, y -= 2, z -= 3;
>      }
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
> new file mode 100644
> index 000000000000..074abdf4936b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } }
> */
> +/* { dg-options "-O3 -fdump-tree-pre" } */
> +
> +typedef unsigned long uint64_t;
> +
> +int foo(void);
> +
> +int ccmp(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0 || d1 != d2)
> +      return foo();
> +    return 0;
> +}
> +
> +int noccmp0(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, bar;
> +
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d1 != d2)
> +      return foo();
> +    return 0;
> +}
> +
> +int noccmp1(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d3 != d4)
> +      return foo();
> +    return 0;
> +}
> +
> +/* Check for condition assignments for noccmp0 and noccmp1.  */
> +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n
> _\d+ = bar_\d+ == 0;} 2 "pre" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
> new file mode 100644
> index 000000000000..99eb52d35f54
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } }
> */
> +/* { dg-options "-O3 -fdump-tree-pre" } */
> +
> +typedef unsigned long uint64_t;
> +
> +int foo(void);
> +
> +uint64_t noccmp0(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      return foo();
> +    if (d3 != d4)
> +      d3++;
> +    else
> +      return foo();
> +    return d3;
> +}
> +
> +uint64_t noccmp1(uint64_t* s1, uint64_t* s2)
> +{
> +    uint64_t d1, d2, d3, d4, bar;
> +    d1 = *s1++;
> +    d2 = *s2++;
> +    d3 = *s1++;
> +    d4 = *s2++;
> +    bar = (d1 ^ d2) & 0xabcd;
> +    if (bar == 0)
> +      d3++;
> +    else
> +      return foo();
> +    if (d3 > d4)
> +      d3++;
> +    else if (d1 != d2)
> +      return foo ();
> +    d3 = d3 + d4 + 1;
> +    return d3;
> +}
> +
> +/* Check for condition assignments in the case that the transformation
> +   is applied.
> +   The transformation should not be applied on noccmp1, where the foo
> call is
> +   on the false branch of the first condition.  */
> +/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n
> _\d+ = bar_\d+ != 0;} 1 "pre" } } */
> +/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */
> diff --git a/gcc/tree-ssa-ifcombine.h b/gcc/tree-ssa-ifcombine.h
> index fc1e3a100cd1..95daac2d942a 100644
> --- a/gcc/tree-ssa-ifcombine.h
> +++ b/gcc/tree-ssa-ifcombine.h
> @@ -26,4 +26,3 @@ bool tree_ssa_ifcombine_bb (basic_block);
>
>  #endif
>
> -
> diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc
> index 4b9dbd886b6a..f72b30134c54 100644
> --- a/gcc/tree-ssa-tail-merge.cc
> +++ b/gcc/tree-ssa-tail-merge.cc
> @@ -189,6 +189,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "system.h"
>  #include "coretypes.h"
>  #include "backend.h"
> +#include "target.h"
>  #include "tree.h"
>  #include "gimple.h"
>  #include "cfghooks.h"
> @@ -202,9 +203,11 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-cfg.h"
>  #include "tree-into-ssa.h"
>  #include "tree-ssa-sccvn.h"
> +#include "tree-ssa-ifcombine.h"
>  #include "cfgloop.h"
>  #include "tree-eh.h"
>  #include "tree-cfgcleanup.h"
> +#include "tree-ssa.h"
>
>  const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
>
> @@ -1707,6 +1710,8 @@ replace_block_by (basic_block bb1, basic_block bb2)
>
>  static bitmap update_bbs;
>
> +static bitmap ifcombine_candidate_bbs;
> +
>  /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
>     number of bbs removed.  */
>
> @@ -1735,6 +1740,27 @@ apply_clusters (void)
>           bitmap_clear_bit (update_bbs, bb1->index);
>
>           replace_block_by (bb1, bb2);
> +
> +         basic_block imm_dominator = get_immediate_dominator (
> +                                       CDI_DOMINATORS, bb2);
> +
> +         /* Find conditions in if-statements that lead to bb.  */
> +         edge e;
> +         edge_iterator ei;
> +         FOR_EACH_EDGE (e, ei, bb2->preds)
> +           {
> +             basic_block then_tmp = NULL;
> +             basic_block else_tmp = NULL;
> +             if (recognize_if_then_else (e->src, &bb2, &else_tmp)
> +                 || recognize_if_then_else (e->src, &then_tmp, &bb2))
> +             {
> +               gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb
> (e->src));
> +               if (cond)
> +                 if (e->src != imm_dominator)
> +                   bitmap_set_bit (ifcombine_candidate_bbs,
> e->src->index);
> +             }
> +           }
> +
>           nr_bbs_removed++;
>         }
>      }
> @@ -1797,6 +1823,55 @@ update_debug_stmts (void)
>      }
>  }
>
> +/* Return true if the dominance subtree rooted at BB has any outgoing edge
> +   to a block outside the subtree where the PHI at the target has a
> +   maybe-undef argument on that edge.  */
> +
> +static bool
> +maybe_undef_at_dom_frontier_p (basic_block bb)
> +{
> +  auto_vec<basic_block, 32> dom_bbs;
> +  auto_bitmap dominated;
> +
> +  /* Collect all blocks dominated by BB.  */
> +  dom_bbs.safe_push (bb);
> +  bitmap_set_bit (dominated, bb->index);
> +  for (unsigned int wi = 0; wi < dom_bbs.length (); wi++)
> +    {
> +      basic_block b = dom_bbs[wi];
> +      for (basic_block son = first_dom_son (CDI_DOMINATORS, b);
> +          son; son = next_dom_son (CDI_DOMINATORS, son))
> +       {
> +         bitmap_set_bit (dominated, son->index);
> +         dom_bbs.safe_push (son);
> +       }
> +    }
> +
> +  /* Check edges from dominated blocks to non-dominated blocks
> +     for maybe-undef PHI arguments.  */
> +  for (auto b : dom_bbs)
> +    {
> +      edge e;
> +      edge_iterator ei;
> +      FOR_EACH_EDGE (e, ei, b->succs)
> +       {
> +         if (bitmap_bit_p (dominated, e->dest->index))
> +           continue;
> +         for (gphi_iterator gsi = gsi_start_phis (e->dest);
> +              !gsi_end_p (gsi); gsi_next (&gsi))
> +           {
> +             gphi *phi = gsi.phi ();
> +             tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
> +             if (TREE_CODE (arg) == SSA_NAME
> +                 && ssa_name_maybe_undef_p (arg))
> +               return true;
> +           }
> +       }
> +    }
> +
> +  return false;
> +}
> +
>  /* Runs tail merge optimization.  */
>
>  unsigned int
> @@ -1807,6 +1882,7 @@ tail_merge_optimize (bool need_crit_edge_split)
>    bool loop_entered = false;
>    int iteration_nr = 0;
>    int max_iterations = param_max_tail_merge_iterations;
> +  unsigned int todo = 0;
>
>    if (!flag_tree_tail_merge
>        || max_iterations == 0)
> @@ -1833,6 +1909,7 @@ tail_merge_optimize (bool need_crit_edge_split)
>           loop_entered = true;
>           alloc_cluster_vectors ();
>           update_bbs = BITMAP_ALLOC (NULL);
> +         ifcombine_candidate_bbs = BITMAP_ALLOC (NULL);
>         }
>        else
>         reset_cluster_vectors ();
> @@ -1866,10 +1943,50 @@ tail_merge_optimize (bool need_crit_edge_split)
>
>    if (nr_bbs_removed_total > 0)
>      {
> +      bool need_dominance
> +       = MAY_HAVE_DEBUG_BIND_STMTS
> +         || (targetm.have_ccmp ()
> +             && !bitmap_empty_p (ifcombine_candidate_bbs));
> +
> +      if (need_dominance)
> +       calculate_dominance_info (CDI_DOMINATORS);
> +
>        if (MAY_HAVE_DEBUG_BIND_STMTS)
> +       update_debug_stmts ();
> +
> +      unsigned int i;
> +      bitmap_iterator bi;
> +      bool cfg_changed = false;
> +      /* On targets that support conditional compares (ccmp), try to
> combine
> +        conditions of blocks that were made to branch to the same
> successor
> +        by tail merging.  This is gated on ccmp support because it will
> +        produce beneficial code when ccmp instructions are available.  */
> +      if (targetm.have_ccmp ()
> +         && !bitmap_empty_p (ifcombine_candidate_bbs))
>         {
> -         calculate_dominance_info (CDI_DOMINATORS);
> -         update_debug_stmts ();
> +         mark_ssa_maybe_undefs ();
> +         EXECUTE_IF_SET_IN_BITMAP (ifcombine_candidate_bbs, 0, i, bi)
> +           {
> +             basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
> +             if (!bb)
> +               continue;
> +
> +             /* Do not combine conditions when the common successor's
> +                dominance subtree has PHI arguments that are
> maybe-undef.  */
> +             edge e;
> +             edge_iterator ei;
> +             bool dominated_undef = false;
> +             FOR_EACH_EDGE (e, ei, bb->succs)
> +               if (maybe_undef_at_dom_frontier_p (e->dest))
> +                 {
> +                   dominated_undef = true;
> +                   break;
> +                 }
> +             if (dominated_undef)
> +               continue;
> +
> +             cfg_changed |= tree_ssa_ifcombine_bb (bb);
> +           }
>         }
>
>        if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -1879,6 +1996,8 @@ tail_merge_optimize (bool need_crit_edge_split)
>         }
>
>        mark_virtual_operands_for_renaming (cfun);
> +
> +      todo |= cfg_changed ? TODO_cleanup_cfg : 0;
>      }
>
>    delete_worklist ();
> @@ -1886,9 +2005,10 @@ tail_merge_optimize (bool need_crit_edge_split)
>      {
>        delete_cluster_vectors ();
>        BITMAP_FREE (update_bbs);
> +      BITMAP_FREE (ifcombine_candidate_bbs);
>      }
>
>    timevar_pop (TV_TREE_TAIL_MERGE);
>
> -  return 0;
> +  return todo;
>  }
> --
> 2.52.0
>
>

Reply via email to