After tail merging combines duplicate blocks, their predecessors branch
to the same successor.  On targets that support conditional compares
(ccmp), combine the sequential conditions leading to the merged block
using the ifcombine infrastructure.  This enables the generation of ccmp
instructions.

The candidate selection identifies predecessor blocks of the merged block
that have conditional branches, excluding the immediate dominator.
After the tail-merge loop completes, dominance info is (re)computed, SSA
names that may be undefined are marked, and tree_ssa_ifcombine_bb is
called for each candidate.

To avoid bogus -Wmaybe-uninitialized warnings, candidates are skipped
when the dominance subtree of a successor has PHI arguments that are
maybe-undef at the dominance frontier, as combining conditions in that
case would change the predicate structure the uninit analysis relies on.

The xfail condition for gcc.dg/guality/pr54693-2.c is adjusted because
the newly combined conditions change the debug info on AArch64.

gcc/ChangeLog:

        PR tree-optimization/102793

        * tree-ssa-ifcombine.h: Remove extra blank line at end.
        * tree-ssa-tail-merge.cc: Include target.h, tree-ssa-ifcombine.h,
        and tree-ssa.h.
        (ifcombine_candidate_bbs): New static bitmap.
        (apply_clusters): After replacing a block, identify predecessors
        of the merged block as ifcombine candidates.
        (maybe_undef_at_dom_frontier_p): New function.
        (tail_merge_optimize): Compute dominance info when needed for
        ifcombine.  Call mark_ssa_maybe_undefs and tree_ssa_ifcombine_bb
        for each candidate on ccmp targets, skipping candidates with
        maybe-undef PHI args at the dominance frontier.  Return
        TODO_cleanup_cfg when ifcombine changed the CFG.

gcc/testsuite/ChangeLog:

        PR tree-optimization/102793

        * gcc.dg/guality/pr54693-2.c: Adjust xfail condition for
        AArch64 to account for the new ccmp-related transformations.
        * gcc.dg/tree-ssa/pr102793-1.c: New test.
        * gcc.dg/tree-ssa/pr102793-2.c: New test.

Signed-off-by: Konstantinos Eleftheriou <[email protected]>
---

Changes in v2:
- Use tree_ssa_ifcombine_bb for combining conditions.

 gcc/testsuite/gcc.dg/guality/pr54693-2.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c |  50 ++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c |  51 +++++++++
 gcc/tree-ssa-ifcombine.h                   |   1 -
 gcc/tree-ssa-tail-merge.cc                 | 126 ++++++++++++++++++++-
 5 files changed, 225 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c

diff --git a/gcc/testsuite/gcc.dg/guality/pr54693-2.c 
b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
index 229ef0efbea0..9d0080782410 100644
--- a/gcc/testsuite/gcc.dg/guality/pr54693-2.c
+++ b/gcc/testsuite/gcc.dg/guality/pr54693-2.c
@@ -18,7 +18,7 @@ foo (int x, int y, int z)
   while (x > 3 && y > 3 && z > 3)
     {          /* { dg-final { gdb-test .+2 "i" "v + 1" } } */
                /* { dg-final { gdb-test .+1 "x" "10 - i" { xfail { 
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" } } } } } */
-      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { 
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
+      bar (i); /* { dg-final { gdb-test . "y" "20 - 2 * i" { xfail { 
aarch64*-*-* && { { any-opts "-flto" } && { no-opts "-fno-use-linker-plugin" } 
} } } } } */
                /* { dg-final { gdb-test .-1 "z" "30 - 3 * i" { xfail { 
aarch64*-*-* && { any-opts "-fno-fat-lto-objects" "-Os" } } } } } */
       i++, x--, y -= 2, z -= 3;
     }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
new file mode 100644
index 000000000000..074abdf4936b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
+/* { dg-options "-O3 -fdump-tree-pre" } */
+
+typedef unsigned long uint64_t;
+
+int foo(void);
+
+int ccmp(uint64_t* s1, uint64_t* s2)
+{
+    uint64_t d1, d2, bar;
+    d1 = *s1++;
+    d2 = *s2++;
+    bar = (d1 ^ d2) & 0xabcd;
+    if (bar == 0 || d1 != d2)
+      return foo();
+    return 0;
+}
+
+int noccmp0(uint64_t* s1, uint64_t* s2)
+{
+    uint64_t d1, d2, bar;
+
+    d1 = *s1++;
+    d2 = *s2++;
+    bar = (d1 ^ d2) & 0xabcd;
+    if (bar == 0)
+      return foo();
+    if (d1 != d2)
+      return foo();
+    return 0;
+}
+
+int noccmp1(uint64_t* s1, uint64_t* s2)
+{
+    uint64_t d1, d2, d3, d4, bar;
+    d1 = *s1++;
+    d2 = *s2++;
+    d3 = *s1++;
+    d4 = *s2++;
+    bar = (d1 ^ d2) & 0xabcd;
+    if (bar == 0)
+      return foo();
+    if (d3 != d4)
+      return foo();
+    return 0;
+}
+
+/* Check for condition assignments for noccmp0 and noccmp1.  */
+/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n  _\d+ = 
bar_\d+ == 0;} 2 "pre" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
new file mode 100644
index 000000000000..99eb52d35f54
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr102793-2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-skip-if "requires ccmp support" { ! { aarch64*-*-* || apxf } } } */
+/* { dg-options "-O3 -fdump-tree-pre" } */
+
+typedef unsigned long uint64_t;
+
+int foo(void);
+
+uint64_t noccmp0(uint64_t* s1, uint64_t* s2)
+{
+    uint64_t d1, d2, d3, d4, bar;
+    d1 = *s1++;
+    d2 = *s2++;
+    d3 = *s1++;
+    d4 = *s2++;
+    bar = (d1 ^ d2) & 0xabcd;
+    if (bar == 0)
+      return foo();
+    if (d3 != d4)
+      d3++;
+    else
+      return foo();
+    return d3;
+}
+
+uint64_t noccmp1(uint64_t* s1, uint64_t* s2)
+{
+    uint64_t d1, d2, d3, d4, bar;
+    d1 = *s1++;
+    d2 = *s2++;
+    d3 = *s1++;
+    d4 = *s2++;
+    bar = (d1 ^ d2) & 0xabcd;
+    if (bar == 0)
+      d3++;
+    else
+      return foo();
+    if (d3 > d4)
+      d3++;
+    else if (d1 != d2)
+      return foo ();
+    d3 = d3 + d4 + 1;
+    return d3;
+}
+
+/* Check for condition assignments in the case that the transformation
+   is applied.
+   The transformation should not be applied on noccmp1, where the foo call is
+   on the false branch of the first condition.  */
+/* { dg-final { scan-tree-dump-times {_\d+ = d\d+_\d+ != d\d+_\d+;\n  _\d+ = 
bar_\d+ != 0;} 1 "pre" } } */
+/* { dg-final { scan-tree-dump-times {if \(bar_\d+ == 0\)} 1 "pre" } } */
diff --git a/gcc/tree-ssa-ifcombine.h b/gcc/tree-ssa-ifcombine.h
index fc1e3a100cd1..95daac2d942a 100644
--- a/gcc/tree-ssa-ifcombine.h
+++ b/gcc/tree-ssa-ifcombine.h
@@ -26,4 +26,3 @@ bool tree_ssa_ifcombine_bb (basic_block);
 
 #endif
 
-
diff --git a/gcc/tree-ssa-tail-merge.cc b/gcc/tree-ssa-tail-merge.cc
index 4b9dbd886b6a..f72b30134c54 100644
--- a/gcc/tree-ssa-tail-merge.cc
+++ b/gcc/tree-ssa-tail-merge.cc
@@ -189,6 +189,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
+#include "target.h"
 #include "tree.h"
 #include "gimple.h"
 #include "cfghooks.h"
@@ -202,9 +203,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "tree-into-ssa.h"
 #include "tree-ssa-sccvn.h"
+#include "tree-ssa-ifcombine.h"
 #include "cfgloop.h"
 #include "tree-eh.h"
 #include "tree-cfgcleanup.h"
+#include "tree-ssa.h"
 
 const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
 
@@ -1707,6 +1710,8 @@ replace_block_by (basic_block bb1, basic_block bb2)
 
 static bitmap update_bbs;
 
+static bitmap ifcombine_candidate_bbs;
+
 /* For each cluster in all_clusters, merge all cluster->bbs.  Returns
    number of bbs removed.  */
 
@@ -1735,6 +1740,27 @@ apply_clusters (void)
          bitmap_clear_bit (update_bbs, bb1->index);
 
          replace_block_by (bb1, bb2);
+
+         basic_block imm_dominator = get_immediate_dominator (
+                                       CDI_DOMINATORS, bb2);
+
+         /* Find conditions in if-statements that lead to bb.  */
+         edge e;
+         edge_iterator ei;
+         FOR_EACH_EDGE (e, ei, bb2->preds)
+           {
+             basic_block then_tmp = NULL;
+             basic_block else_tmp = NULL;
+             if (recognize_if_then_else (e->src, &bb2, &else_tmp)
+                 || recognize_if_then_else (e->src, &then_tmp, &bb2))
+             {
+               gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (e->src));
+               if (cond)
+                 if (e->src != imm_dominator)
+                   bitmap_set_bit (ifcombine_candidate_bbs, e->src->index);
+             }
+           }
+
          nr_bbs_removed++;
        }
     }
@@ -1797,6 +1823,55 @@ update_debug_stmts (void)
     }
 }
 
+/* Return true if the dominance subtree rooted at BB has any outgoing edge
+   to a block outside the subtree where the PHI at the target has a
+   maybe-undef argument on that edge.  */
+
+static bool
+maybe_undef_at_dom_frontier_p (basic_block bb)
+{
+  auto_vec<basic_block, 32> dom_bbs;
+  auto_bitmap dominated;
+
+  /* Collect all blocks dominated by BB.  */
+  dom_bbs.safe_push (bb);
+  bitmap_set_bit (dominated, bb->index);
+  for (unsigned int wi = 0; wi < dom_bbs.length (); wi++)
+    {
+      basic_block b = dom_bbs[wi];
+      for (basic_block son = first_dom_son (CDI_DOMINATORS, b);
+          son; son = next_dom_son (CDI_DOMINATORS, son))
+       {
+         bitmap_set_bit (dominated, son->index);
+         dom_bbs.safe_push (son);
+       }
+    }
+
+  /* Check edges from dominated blocks to non-dominated blocks
+     for maybe-undef PHI arguments.  */
+  for (auto b : dom_bbs)
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, b->succs)
+       {
+         if (bitmap_bit_p (dominated, e->dest->index))
+           continue;
+         for (gphi_iterator gsi = gsi_start_phis (e->dest);
+              !gsi_end_p (gsi); gsi_next (&gsi))
+           {
+             gphi *phi = gsi.phi ();
+             tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
+             if (TREE_CODE (arg) == SSA_NAME
+                 && ssa_name_maybe_undef_p (arg))
+               return true;
+           }
+       }
+    }
+
+  return false;
+}
+
 /* Runs tail merge optimization.  */
 
 unsigned int
@@ -1807,6 +1882,7 @@ tail_merge_optimize (bool need_crit_edge_split)
   bool loop_entered = false;
   int iteration_nr = 0;
   int max_iterations = param_max_tail_merge_iterations;
+  unsigned int todo = 0;
 
   if (!flag_tree_tail_merge
       || max_iterations == 0)
@@ -1833,6 +1909,7 @@ tail_merge_optimize (bool need_crit_edge_split)
          loop_entered = true;
          alloc_cluster_vectors ();
          update_bbs = BITMAP_ALLOC (NULL);
+         ifcombine_candidate_bbs = BITMAP_ALLOC (NULL);
        }
       else
        reset_cluster_vectors ();
@@ -1866,10 +1943,50 @@ tail_merge_optimize (bool need_crit_edge_split)
 
   if (nr_bbs_removed_total > 0)
     {
+      bool need_dominance
+       = MAY_HAVE_DEBUG_BIND_STMTS
+         || (targetm.have_ccmp ()
+             && !bitmap_empty_p (ifcombine_candidate_bbs));
+
+      if (need_dominance)
+       calculate_dominance_info (CDI_DOMINATORS);
+
       if (MAY_HAVE_DEBUG_BIND_STMTS)
+       update_debug_stmts ();
+
+      unsigned int i;
+      bitmap_iterator bi;
+      bool cfg_changed = false;
+      /* On targets that support conditional compares (ccmp), try to combine
+        conditions of blocks that were made to branch to the same successor
+        by tail merging.  This is gated on ccmp support because it will
+        produce beneficial code when ccmp instructions are available.  */
+      if (targetm.have_ccmp ()
+         && !bitmap_empty_p (ifcombine_candidate_bbs))
        {
-         calculate_dominance_info (CDI_DOMINATORS);
-         update_debug_stmts ();
+         mark_ssa_maybe_undefs ();
+         EXECUTE_IF_SET_IN_BITMAP (ifcombine_candidate_bbs, 0, i, bi)
+           {
+             basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
+             if (!bb)
+               continue;
+
+             /* Do not combine conditions when the common successor's
+                dominance subtree has PHI arguments that are maybe-undef.  */
+             edge e;
+             edge_iterator ei;
+             bool dominated_undef = false;
+             FOR_EACH_EDGE (e, ei, bb->succs)
+               if (maybe_undef_at_dom_frontier_p (e->dest))
+                 {
+                   dominated_undef = true;
+                   break;
+                 }
+             if (dominated_undef)
+               continue;
+
+             cfg_changed |= tree_ssa_ifcombine_bb (bb);
+           }
        }
 
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1879,6 +1996,8 @@ tail_merge_optimize (bool need_crit_edge_split)
        }
 
       mark_virtual_operands_for_renaming (cfun);
+
+      todo |= cfg_changed ? TODO_cleanup_cfg : 0;
     }
 
   delete_worklist ();
@@ -1886,9 +2005,10 @@ tail_merge_optimize (bool need_crit_edge_split)
     {
       delete_cluster_vectors ();
       BITMAP_FREE (update_bbs);
+      BITMAP_FREE (ifcombine_candidate_bbs);
     }
 
   timevar_pop (TV_TREE_TAIL_MERGE);
 
-  return 0;
+  return todo;
 }
-- 
2.52.0

Reply via email to