On Fri, May 8, 2026 at 4:47 AM Naveen
<[email protected]> wrote:
>
> Extend scalar SAT_ADD constant folding to recognize cases where one operand is
> zero. It allows SAT_ADD expressions with constant operands to fold away early.
> The change improves optimization opportunities and avoids emitting unnecessary
> SAT_ADD operations.
> Bootstrapped and tested on aarch64-linux-gnu.
>
> PR middle-end/123286
>
> gcc/ChangeLog:
>         * fold-const-call.cc (fold_internal_fn_sat_add): New function.
>         (fold_const_call): Handle CFN_SAT_ADD.
>         * match.pd: Add simplifications for x SAT_ADD 0 == x.
>         * genmatch.cc (commutative_op): Add CFN_SAT_ADD.
>
> gcc/testsuite/ChangeLog:
>         * gcc.dg/pr123286.c: New test.
>
> Signed-off-by: Naveen <[email protected]>
> ---
>  gcc/fold-const-call.cc          | 41 +++++++++++++++++++++++++++++++++
>  gcc/genmatch.cc                 |  1 +
>  gcc/match.pd                    |  5 ++++
>  gcc/testsuite/gcc.dg/pr123286.c | 33 ++++++++++++++++++++++++++
>  4 files changed, 80 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr123286.c
>
> diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
> index 7dd1b21c34f..031e2d32f0c 100644
> --- a/gcc/fold-const-call.cc
> +++ b/gcc/fold-const-call.cc
> @@ -1477,6 +1477,44 @@ fold_const_vec_extract (tree, tree arg0, tree)
>    return NULL_TREE;
>  }
>
> +/* Try to fold scalar integer IFN_SAT_ADD with operands OP0 and OP1.  */
> +
> +static tree
> +fold_internal_fn_sat_add (tree type, tree op0, tree op1)
> +{
> +  if (!INTEGRAL_NB_TYPE_P (type)
> +      || VECTOR_TYPE_P (type))
> +    return NULL_TREE;

!INTEGRAL_NB_TYPE_P already excludes VECTOR_TYPE. So you can safely
remove that check.

> +
> +  if (TREE_CODE (op0) != INTEGER_CST
> +      || TREE_CODE (op1) != INTEGER_CST)
> +    return NULL_TREE;
> +
> +  wi::overflow_type overflow;
> +  unsigned int prec = TYPE_PRECISION (type);
> +
> +  if (TYPE_UNSIGNED (type))
> +    {
> +      wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
> +                                UNSIGNED, &overflow);
> +      if (overflow != wi::OVF_NONE)
> +       result = wi::max_value (prec, UNSIGNED);
> +      return wide_int_to_tree (type, result);
> +    }
> +  else
> +    {
> +      wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
> +                                SIGNED, &overflow);
> +      if (overflow == wi::OVF_OVERFLOW)
> +       result = wi::max_value (prec, SIGNED);
> +      else if (overflow == wi::OVF_UNDERFLOW)
> +       result = wi::min_value (prec, SIGNED);
> +      else if (overflow != wi::OVF_NONE)
> +       return NULL_TREE;
> +      return wide_int_to_tree (type, result);
> +    }

I think we can do slightly better and less duplication:

  wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
                                          TYPE_SIGNED (type), &overflow);
   if (overflow == wi::OVF_NONE)
    ;
  else if (TYPE_UNSIGNED (type))
    result = wi::max_value (prec, UNSIGNED);
  else
    {
      if (overflow == wi::OVF_OVERFLOW)
        result = wi::max_value (prec, SIGNED);
     else if (overflow == wi::OVF_UNDERFLOW)
      result = wi::min_value (prec, SIGNED);
     else
      return NULL_TREE;
   }
  return wide_int_to_tree (type, result);

Thanks,
Andrea

> +}
> +
>  /* Try to evaluate:
>
>        *RESULT = FN (*ARG0, *ARG1)
> @@ -1886,6 +1924,9 @@ fold_const_call (combined_fn fn, tree type, tree arg0, 
> tree arg1)
>      case CFN_VEC_EXTRACT:
>        return fold_const_vec_extract (type, arg0, arg1);
>
> +    case CFN_SAT_ADD:
> +      return fold_internal_fn_sat_add (type, arg0, arg1);
> +
>      case CFN_UBSAN_CHECK_ADD:
>      case CFN_ADD_OVERFLOW:
>        subcode = PLUS_EXPR;
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index 633ff9e1db2..6c267ca8dc2 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -1268,6 +1268,7 @@ commutative_op (id_base *id, bool 
> compares_are_commutative = false)
>        case CFN_FNMS:
>        case CFN_ADD_OVERFLOW:
>        case CFN_MUL_OVERFLOW:
> +      case CFN_SAT_ADD:
>         return 0;
>
>        case CFN_COND_ADD:
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 8be7f1c60db..ecc1a362443 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8928,6 +8928,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         && TYPE_UNSIGNED (TREE_TYPE (@0)))
>     (cmp @1 @0))))
>
> +/* x SAT_ADD 0 == x.  */
> +(simplify
> + (IFN_SAT_ADD:c @0 integer_zerop)
> +  @0)
> +
>  /* Optimize A - B + -1 >= A into B >= A for unsigned comparisons.  */
>  (for cmp (ge lt)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/pr123286.c b/gcc/testsuite/gcc.dg/pr123286.c
> new file mode 100644
> index 00000000000..d6689aa91d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr123286.c
> @@ -0,0 +1,33 @@
> +/* PR middle-end/123286 */
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <arm_neon.h>
> +#include <stdint.h>
> +
> +uint64_t
> +f1 (uint64_t a)
> +{
> +  uint64x1_t va;
> +  uint64x1_t vz;
> +
> +  va = vdup_n_u64 (a);
> +  vz = vdup_n_u64 (0);
> +
> +  return vqadd_u64 (va, vz)[0];
> +}
> +
> +uint64_t
> +f2 (uint64_t a)
> +{
> +  uint64x1_t va;
> +  uint64x1_t vz;
> +
> +  va = vdup_n_u64 (0);
> +  vz = vdup_n_u64 (a);
> +
> +  return vqadd_u64 (va, vz)[0];
> +}
> +
> +/* Both SAT_ADD calls should fold away.  */
> +/* { dg-final { scan-tree-dump-not "\\.SAT_ADD" "optimized" } } */
> --
> 2.34.1
>

Reply via email to