From: Pan Li <pan2...@intel.com>

After we support one gassign form of the unsigned .SAT_ADD,  we
would like to support more forms including both the branch and
branchless.  There are 5 other forms of .SAT_ADD,  list as below:

Form 1:
  #define SAT_ADD_U_1(T) \
  T sat_add_u_1_##T(T x, T y) \
  { \
    return (T)(x + y) >= x ? (x + y) : -1; \
  }

Form 2:
  #define SAT_ADD_U_2(T) \
  T sat_add_u_2_##T(T x, T y) \
  { \
    T ret; \
    T overflow = __builtin_add_overflow (x, y, &ret); \
    return (T)(-overflow) | ret; \
  }

Form 3:
  #define SAT_ADD_U_3(T) \
  T sat_add_u_3_##T (T x, T y) \
  { \
    T ret; \
    return __builtin_add_overflow (x, y, &ret) ? -1 : ret; \
  }

Form 4:
  #define SAT_ADD_U_4(T) \
  T sat_add_u_4_##T (T x, T y) \
  { \
    T ret; \
    return __builtin_add_overflow (x, y, &ret) == 0 ? ret : -1; \
  }

Form 5:
  #define SAT_ADD_U_5(T) \
  T sat_add_u_5_##T(T x, T y) \
  { \
    return (T)(x + y) < x ? -1 : (x + y); \
  }

Take the forms 3 of above as example:

uint64_t
sat_add (uint64_t x, uint64_t y)
{
  uint64_t ret;
  return __builtin_add_overflow (x, y, &ret) ? -1 : ret;
}

Before this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  long unsigned int _2;
  uint64_t _3;
  __complex__ long unsigned int _6;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _6 = .ADD_OVERFLOW (x_4(D), y_5(D));
  _2 = IMAGPART_EXPR <_6>;
  if (_2 != 0)
    goto <bb 4>; [35.00%]
  else
    goto <bb 3>; [65.00%]
;;    succ:       4
;;                3

;;   basic block 3, loop depth 0
;;    pred:       2
  _1 = REALPART_EXPR <_6>;
;;    succ:       4

;;   basic block 4, loop depth 0
;;    pred:       3
;;                2
  # _3 = PHI <_1(3), 18446744073709551615(2)>
  return _3;
;;    succ:       EXIT
}

After this patch:
uint64_t sat_add (uint64_t x, uint64_t y)
{
  long unsigned int _12;

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _12 = .SAT_ADD (x_4(D), y_5(D)); [tail call]
  return _12;
;;    succ:       EXIT
}

The below test suites are still running, will update it later.
* The x86 bootstrap test.
* The x86 fully regression test.
* The riscv fully regression test.

gcc/ChangeLog:

        * genmatch.cc (dt_node::gen_kids): Add new arg of predicate id.
        (allow_phi_predicate_p): New func impl to check the phi
        predicate is allowed or not.
        (dt_node::gen_kids_1): Add COND_EXPR gen for phi node if allowed.
        (dt_operand::gen_phi_on_cond):
        (write_predicate): Init the predicate id before gen_kids.
        * match.pd: Add more forms of unsigned_integer_sat_add and
        comments.
        * tree-ssa-math-opts.cc (match_saturation_arith): Rename from.
        (match_assign_saturation_arith): Rename to.
        (match_phi_saturation_arith): New func impl to match phi.
        (math_opts_dom_walker::after_dom_children): Add phi match for
        echo bb.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/genmatch.cc           | 123 ++++++++++++++++++++++++++++++++++++--
 gcc/match.pd              |  43 ++++++++++++-
 gcc/tree-ssa-math-opts.cc |  51 +++++++++++++++-
 3 files changed, 210 insertions(+), 7 deletions(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index f1e0e7abe0c..816d2dafd23 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1767,6 +1767,7 @@ public:
   unsigned level;
   dt_node *parent;
   vec<dt_node *> kids;
+  const char *id;
 
   /* Statistics.  */
   unsigned num_leafs;
@@ -1786,7 +1787,7 @@ public:
   virtual void gen (FILE *, int, bool, int) {}
 
   void gen_kids (FILE *, int, bool, int);
-  void gen_kids_1 (FILE *, int, bool, int,
+  void gen_kids_1 (FILE *, const char *, int, bool, int,
                   const vec<dt_operand *> &, const vec<dt_operand *> &,
                   const vec<dt_operand *> &, const vec<dt_operand *> &,
                   const vec<dt_operand *> &, const vec<dt_node *> &);
@@ -1819,6 +1820,7 @@ public:
 
   char *get_name (char *);
   void gen_opname (char *, unsigned);
+  void gen_phi_on_cond (FILE *, int, bool, int);
 };
 
 /* Leaf node of the decision tree, used for DT_SIMPLIFY.  */
@@ -3173,7 +3175,7 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int 
depth)
             for what we have collected sofar.  */
          fns.qsort (fns_cmp);
          generic_fns.qsort (fns_cmp);
-         gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+         gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
                      fns, generic_fns, preds, others);
          /* And output the true operand itself.  */
          kids[i]->gen (f, indent, gimple, depth);
@@ -3191,14 +3193,21 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, 
int depth)
   /* Generate code for the remains.  */
   fns.qsort (fns_cmp);
   generic_fns.qsort (fns_cmp);
-  gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
+  gen_kids_1 (f, id, indent, gimple, depth, gimple_exprs, generic_exprs,
              fns, generic_fns, preds, others);
 }
 
+static bool
+allow_phi_predicate_p (const char *id)
+{
+  return id && strcmp (id, "unsigned_integer_sat_add") == 0;
+}
+
 /* Generate matching code for the children of the decision tree node.  */
 
 void
-dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
+dt_node::gen_kids_1 (FILE *f, const char *id, int indent, bool gimple,
+                    int depth,
                     const vec<dt_operand *> &gimple_exprs,
                     const vec<dt_operand *> &generic_exprs,
                     const vec<dt_operand *> &fns,
@@ -3251,6 +3260,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, 
int depth,
                          depth);
          indent += 4;
          fprintf_indent (f, indent, "{\n");
+         bool cond_expr_p = false;
          for (unsigned i = 0; i < exprs_len; ++i)
            {
              expr *e = as_a <expr *> (gimple_exprs[i]->op);
@@ -3262,6 +3272,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, 
int depth,
              else
                {
                  id_base *op = e->operation;
+                 cond_expr_p |= *op == COND_EXPR;
                  if (*op == CONVERT_EXPR || *op == NOP_EXPR)
                    fprintf_indent (f, indent, "CASE_CONVERT:\n");
                  else
@@ -3275,6 +3286,27 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, 
int depth,
          fprintf_indent (f, indent, "default:;\n");
          fprintf_indent (f, indent, "}\n");
          indent -= 4;
+
+         if (cond_expr_p && allow_phi_predicate_p (id))
+           {
+             fprintf_indent (f, indent,
+               "else if (gphi *_a%d = dyn_cast <gphi *> (_d%d))\n",
+               depth, depth);
+             indent += 2;
+             fprintf_indent (f, indent, "{\n");
+             indent += 2;
+
+             for (unsigned i = 0; i < exprs_len; i++)
+               {
+                 expr *e = as_a <expr *> (gimple_exprs[i]->op);
+                 if (*e->operation == COND_EXPR)
+                   gimple_exprs[i]->gen_phi_on_cond (f, indent, true, depth);
+               }
+
+             indent -= 2;
+             fprintf_indent (f, indent, "}\n");
+             indent -= 2;
+           }
        }
 
       if (fns_len)
@@ -3483,6 +3515,87 @@ dt_operand::gen (FILE *f, int indent, bool gimple, int 
depth)
     }
 }
 
+/* Generate matching code for the phi when meet COND_EXPR.  */
+
+void
+dt_operand::gen_phi_on_cond (FILE *f, int indent, bool gimple, int depth)
+{
+  fprintf_indent (f, indent,
+    "basic_block _b%d = gimple_bb (_a%d);\n", depth, depth);
+
+  fprintf_indent (f, indent,
+    "if (EDGE_COUNT (_b%d->preds) == 2 && gimple_phi_num_args (_a%d) == 2)\n",
+    depth, depth);
+
+  indent += 2;
+  fprintf_indent (f, indent, "{\n");
+  indent += 2;
+
+  fprintf_indent (f, indent,
+    "basic_block _pb_0_%d = EDGE_PRED (_b%d, 0)->src;\n", depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _pb_1_%d = EDGE_PRED (_b%d, 1)->src;\n", depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _db_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_pb_0_%d)) ? "
+    "_pb_0_%d : _pb_1_%d;\n", depth, depth, depth, depth);
+  fprintf_indent (f, indent,
+    "basic_block _other_db_%d = safe_dyn_cast <gcond *> "
+    "(*gsi_last_bb (_pb_0_%d)) ? _pb_1_%d : _pb_0_%d;\n",
+    depth, depth, depth, depth);
+
+  fprintf_indent (f, indent,
+    "gcond *_ct_%d = safe_dyn_cast <gcond *> (*gsi_last_bb (_db_%d));\n ",
+    depth, depth);
+  fprintf_indent (f, indent, "if (_ct_%d "
+    "&& EDGE_COUNT (_other_db_%d->preds) == 1 "
+    "&& EDGE_COUNT (_other_db_%d->succs) == 1 "
+    "&& EDGE_SUCC (_other_db_%d, 0)->dest == _b%d)\n",
+    depth, depth, depth, depth, depth);
+
+  indent += 2;
+  fprintf_indent (f, indent, "{\n");
+  indent += 2;
+
+  fprintf_indent (f, indent,
+    "tree _cond_lhs_%d = gimple_cond_lhs (_ct_%d);\n", depth, depth);
+  fprintf_indent (f, indent,
+    "tree _cond_rhs_%d = gimple_cond_rhs (_ct_%d);\n", depth, depth);
+
+  char opname_0[20];
+  char opname_1[20];
+  char opname_2[20];
+  gen_opname (opname_0, 0);
+
+  fprintf_indent (f, indent,
+    "tree %s = build2 (gimple_cond_code (_ct_%d), "
+    "boolean_type_node, _cond_lhs_%d, _cond_rhs_%d);\n",
+    opname_0, depth, depth, depth);
+
+  fprintf_indent (f, indent,
+    "bool _arg_0_is_true_%d = gimple_phi_arg_edge (_a%d, 0)->flags "
+    " & EDGE_TRUE_VALUE;\n", depth, depth);
+
+  gen_opname (opname_1, 1);
+  fprintf_indent (f, indent,
+    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 0 : 1);\n",
+    opname_1, depth, depth);
+
+  gen_opname (opname_2, 2);
+  fprintf_indent (f, indent,
+    "tree %s = gimple_phi_arg_def (_a%d, _arg_0_is_true_%d ? 1 : 0);\n",
+    opname_2, depth, depth);
+
+  gen_kids (f, indent, gimple, depth);
+
+  indent -= 2;
+  fprintf_indent (f, indent, "}\n");
+  indent -= 2;
+
+  indent -= 2;
+  fprintf_indent (f, indent, "}\n");
+  indent -= 2;
+}
+
 /* Emit a logging call to the debug file to the file F, with the INDENT from
    either the RESULT location or the S's match location if RESULT is null. */
 static void
@@ -4263,6 +4376,8 @@ write_predicate (FILE *f, predicate_id *p, decision_tree 
&dt, bool gimple)
 
   if (!gimple)
     fprintf_indent (f, 2, "if (TREE_SIDE_EFFECTS (t)) return false;\n");
+
+  dt.root->id = p->id;
   dt.root->gen_kids (f, 2, gimple, 0);
 
   fprintf_indent (f, 2, "return false;\n"
diff --git a/gcc/match.pd b/gcc/match.pd
index 024e3350465..ddbdb7f7638 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3046,31 +3046,48 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
 
 /* Unsigned Saturation Add */
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -((X + Y) < X)  */
 (match (usadd_left_part_1 @0 @1)
  (plus:c @0 @1)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 
0) */
 (match (usadd_left_part_2 @0 @1)
  (realpart (IFN_ADD_OVERFLOW:c @0 @1))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -((type)(X + Y) < X)  */
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (lt (plus:c @0 @1) @0)))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_1 | usadd_right_part_1, aka:
+   SAT_ADD = (X + Y) | -(X > (X + Y))  */
 (match (usadd_right_part_1 @0 @1)
  (negate (convert (gt @0 (plus:c @0 @1))))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 
0) */
 (match (usadd_right_part_2 @0 @1)
  (negate (convert (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)))
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
       && types_match (type, @0, @1))))
 
+/* SAT_ADD = usadd_left_part_2 | usadd_right_part_2, aka:
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> */
+(match (usadd_right_part_2 @0 @1)
+ (negate (imagpart (IFN_ADD_OVERFLOW:c @0 @1)))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+      && types_match (type, @0, @1))))
+
 /* We cannot merge or overload usadd_left_part_1 and usadd_left_part_2
    because the sub part of left_part_2 cannot work with right_part_1.
    For example, left_part_2 pattern focus one .ADD_OVERFLOW but the
@@ -3082,10 +3099,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_1 @0 @1) (usadd_right_part_1 @0 @1)))
 
-/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW).  */
+/* Unsigned saturation add, case 2 (branchless with .ADD_OVERFLOW):
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | -IMAGPART_EXPR <.ADD_OVERFLOW> or
+   SAT_ADD = REALPART_EXPR <.ADD_OVERFLOW> | (IMAGPART_EXPR <.ADD_OVERFLOW> != 
0) */
 (match (unsigned_integer_sat_add @0 @1)
  (bit_ior:c (usadd_left_part_2 @0 @1) (usadd_right_part_2 @0 @1)))
 
+/* Unsigned saturation add, case 3 (branch with ge):
+   SAT_U_ADD = (X + Y) >= x ? (X + Y) : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (ge (usadd_left_part_1@2 @0 @1) @0) @2 integer_minus_onep))
+
+/* Unsigned saturation add, case 4 (branch with lt):
+   SAT_U_ADD = (X + Y) < x ? -1 : (X + Y).  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (lt (usadd_left_part_1@2 @0 @1) @0) integer_minus_onep @2))
+
+/* Unsigned saturation add, case 5 (branch with eq .ADD_OVERFLOW):
+   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> == 0 ? .ADD_OVERFLOW : -1.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (eq (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+  (usadd_left_part_2 @0 @1) integer_minus_onep))
+
+/* Unsigned saturation add, case 6 (branch with ne .ADD_OVERFLOW):
+   SAT_U_ADD = REALPART_EXPR <.ADD_OVERFLOW> != 0 ? -1 : .ADD_OVERFLOW.  */
+(match (unsigned_integer_sat_add @0 @1)
+ (cond (ne (imagpart (IFN_ADD_OVERFLOW:c @0 @1)) integer_zerop)
+  integer_minus_onep (usadd_left_part_2 @0 @1)))
+
 /* x >  y  &&  x != XXX_MIN  -->  x > y
    x >  y  &&  x == XXX_MIN  -->  false . */
 (for eqne (eq ne)
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 62da1c5ee08..8624d414347 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4099,7 +4099,7 @@ extern bool gimple_unsigned_integer_sat_add (tree, tree*, 
tree (*)(tree));
  *      =>
  *      _12 = .SAT_ADD (_4, _6);  */
 static void
-match_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
+match_assign_saturation_arith (gimple_stmt_iterator *gsi, gassign *stmt)
 {
   gcall *call = NULL;
 
@@ -4116,6 +4116,47 @@ match_saturation_arith (gimple_stmt_iterator *gsi, 
gassign *stmt)
     }
 }
 
+/*
+ * Try to match saturation arith pattern(s).
+ * From:
+ *   <bb 2> :
+ *   _1 = x_3(D) + y_4(D);
+ *   if (_1 >= x_3(D))
+ *     goto <bb 3>; [INV]
+ *   else
+ *     goto <bb 4>; [INV]
+ *
+ *   <bb 3> :
+ *
+ *   <bb 4> :
+ *   # _2 = PHI <255(2), _1(3)>
+ *
+ * To:
+ *   <bb 4> [local count: 1073741824]:
+ *   _2 = .SAT_ADD (x_4(D), y_5(D));  */
+
+static void
+match_phi_saturation_arith (gimple_stmt_iterator *gsi, gphi *phi)
+{
+  if (gimple_phi_num_args (phi) != 2)
+    return;
+
+  tree ops[2];
+  tree phi_result = gimple_phi_result (phi);
+
+  if (gimple_unsigned_integer_sat_add (phi_result, ops, NULL)
+      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (phi_result),
+                                        OPTIMIZE_FOR_BOTH))
+    {
+      gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], 
ops[1]);
+      gimple_call_set_lhs (call, phi_result);
+      gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+      gimple_stmt_iterator psi = gsi_for_stmt (phi);
+      remove_phi_node (&psi, false);
+    }
+}
+
 /* Recognize for unsigned x
    x = y - z;
    if (x > y)
@@ -6029,6 +6070,12 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
 
   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
 
+  for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next 
(&psi))
+    {
+      gimple_stmt_iterator gsi = gsi_last_bb (bb);
+      match_phi_saturation_arith (&gsi, psi.phi ());
+    }
+
   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
     {
       gimple *stmt = gsi_stmt (gsi);
@@ -6078,7 +6125,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
              break;
 
            case BIT_IOR_EXPR:
-             match_saturation_arith (&gsi, as_a<gassign *> (stmt));
+             match_assign_saturation_arith (&gsi, as_a<gassign *> (stmt));
              /* fall-through  */
            case BIT_XOR_EXPR:
              match_uaddc_usubc (&gsi, stmt, code);
-- 
2.34.1

Reply via email to