Extend scalar SAT_ADD constant folding to recognize cases where one operand is
zero. It allows SAT_ADD expressions with constant operands to fold away early.
The change improves optimization opportunities and avoids emitting unnecessary
SAT_ADD operations.
Bootstrapped and tested on aarch64-linux-gnu.

PR middle-end/123286

gcc/ChangeLog:
        * fold-const-call.cc (fold_internal_fn_sat_add): New function.
        (fold_const_call): Handle CFN_SAT_ADD.
        * match.pd: Add simplifications for x SAT_ADD 0 == x.
        * genmatch.cc (commutative_op): Add CFN_SAT_ADD.

gcc/testsuite/ChangeLog:
        * gcc.dg/pr123286.c: New test.

Signed-off-by: Naveen <[email protected]>
---
 gcc/fold-const-call.cc          | 41 +++++++++++++++++++++++++++++++++
 gcc/genmatch.cc                 |  1 +
 gcc/match.pd                    |  5 ++++
 gcc/testsuite/gcc.dg/pr123286.c | 33 ++++++++++++++++++++++++++
 4 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr123286.c

diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
index 7dd1b21c34f..031e2d32f0c 100644
--- a/gcc/fold-const-call.cc
+++ b/gcc/fold-const-call.cc
@@ -1477,6 +1477,44 @@ fold_const_vec_extract (tree, tree arg0, tree)
   return NULL_TREE;
 }
 
+/* Try to fold scalar integer IFN_SAT_ADD with operands OP0 and OP1.  */
+
+static tree
+fold_internal_fn_sat_add (tree type, tree op0, tree op1)
+{
+  if (!INTEGRAL_NB_TYPE_P (type)
+      || VECTOR_TYPE_P (type))
+    return NULL_TREE;
+
+  if (TREE_CODE (op0) != INTEGER_CST
+      || TREE_CODE (op1) != INTEGER_CST)
+    return NULL_TREE;
+
+  wi::overflow_type overflow;
+  unsigned int prec = TYPE_PRECISION (type);
+
+  if (TYPE_UNSIGNED (type))
+    {
+      wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
+                                UNSIGNED, &overflow);
+      if (overflow != wi::OVF_NONE)
+       result = wi::max_value (prec, UNSIGNED);
+      return wide_int_to_tree (type, result);
+    }
+  else
+    {
+      wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
+                                SIGNED, &overflow);
+      if (overflow == wi::OVF_OVERFLOW)
+       result = wi::max_value (prec, SIGNED);
+      else if (overflow == wi::OVF_UNDERFLOW)
+       result = wi::min_value (prec, SIGNED);
+      else if (overflow != wi::OVF_NONE)
+       return NULL_TREE;
+      return wide_int_to_tree (type, result);
+    }
+}
+
 /* Try to evaluate:
 
       *RESULT = FN (*ARG0, *ARG1)
@@ -1886,6 +1924,9 @@ fold_const_call (combined_fn fn, tree type, tree arg0, 
tree arg1)
     case CFN_VEC_EXTRACT:
       return fold_const_vec_extract (type, arg0, arg1);
 
+    case CFN_SAT_ADD:
+      return fold_internal_fn_sat_add (type, arg0, arg1);
+
     case CFN_UBSAN_CHECK_ADD:
     case CFN_ADD_OVERFLOW:
       subcode = PLUS_EXPR;
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 633ff9e1db2..6c267ca8dc2 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1268,6 +1268,7 @@ commutative_op (id_base *id, bool 
compares_are_commutative = false)
       case CFN_FNMS:
       case CFN_ADD_OVERFLOW:
       case CFN_MUL_OVERFLOW:
+      case CFN_SAT_ADD:
        return 0;
 
       case CFN_COND_ADD:
diff --git a/gcc/match.pd b/gcc/match.pd
index 8be7f1c60db..ecc1a362443 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8928,6 +8928,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && TYPE_UNSIGNED (TREE_TYPE (@0)))
    (cmp @1 @0))))
 
+/* x SAT_ADD 0 == x.  */
+(simplify
+ (IFN_SAT_ADD:c @0 integer_zerop)
+  @0)
+
 /* Optimize A - B + -1 >= A into B >= A for unsigned comparisons.  */
 (for cmp (ge lt)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/pr123286.c b/gcc/testsuite/gcc.dg/pr123286.c
new file mode 100644
index 00000000000..d6689aa91d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr123286.c
@@ -0,0 +1,33 @@
+/* PR middle-end/123286 */
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+
+#include <arm_neon.h>
+#include <stdint.h>
+
+uint64_t
+f1 (uint64_t a)
+{
+  uint64x1_t va;
+  uint64x1_t vz;
+
+  va = vdup_n_u64 (a);
+  vz = vdup_n_u64 (0);
+
+  return vqadd_u64 (va, vz)[0];
+}
+
+uint64_t
+f2 (uint64_t a)
+{
+  uint64x1_t va;
+  uint64x1_t vz;
+
+  va = vdup_n_u64 (0);
+  vz = vdup_n_u64 (a);
+
+  return vqadd_u64 (va, vz)[0];
+}
+
+/* Both SAT_ADD calls should fold away.  */
+/* { dg-final { scan-tree-dump-not "\\.SAT_ADD" "optimized" } } */
-- 
2.34.1

Reply via email to