On Fri, May 8, 2026 at 4:53 AM Andrew Pinski
<[email protected]> wrote:
>
> On Thu, Apr 16, 2026 at 11:55 PM Naveen
> <[email protected]> wrote:
> >
> > Extend scalar SAT_ADD constant folding to recognize cases where one operand 
> > is
> > zero. It allows SAT_ADD expressions with constant operands to fold away 
> > early.
> > The change improves optimization opportunities and avoids emitting 
> > unnecessary
> > SAT_ADD operations.
> > Bootstrapped and tested on aarch64-linux-gnu.
> >
> > PR middle-end/123826
> >
> > gcc/ChangeLog:
> >         * fold-const-call.cc (fold_internal_fn_sat_add): New function.
> >         (fold_const_call): Handle CFN_SAT_ADD.
> >         * match.pd: Add simplifications for x SAT_ADD 0 == x.
> >
> > gcc/testsuite/ChangeLog:
> >         * gcc.dg/pr123826.c: New test.
> >
> > Signed-off-by: Naveen <[email protected]>
> > ---
> >  gcc/fold-const-call.cc          | 30 ++++++++++++++++++++++++++++++
> >  gcc/match.pd                    | 11 +++++++++++
> >  gcc/testsuite/gcc.dg/pr123826.c | 33 +++++++++++++++++++++++++++++++++
> >  3 files changed, 74 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr123826.c
> >
> > diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
> > index 7dd1b21c34f..37b44e19db0 100644
> > --- a/gcc/fold-const-call.cc
> > +++ b/gcc/fold-const-call.cc
> > @@ -1477,6 +1477,33 @@ fold_const_vec_extract (tree, tree arg0, tree)
> >    return NULL_TREE;
> >  }
> >
> > +/* Try to fold scalar integer IFN_SAT_ADD with operands OP0 and OP1.  */
> > +
> > +static tree
> > +fold_internal_fn_sat_add (tree type, tree op0, tree op1)
> > +{
> > +  if (!INTEGRAL_TYPE_P (type)
> > +      || VECTOR_TYPE_P (type)
> > +      || TREE_CODE (type) == BOOLEAN_TYPE)
> > +    return NULL_TREE;
>
> You can use the new predicate INTEGRAL_NB_TYPE_P instead of checking
> for BOOLEAN_TYPE directly.
>
> > +
> > +  if (TREE_CODE (op0) != INTEGER_CST
> > +      || TREE_CODE (op1) != INTEGER_CST)
> > +    return NULL_TREE;
>
>
> > +
> > +  if (!arith_overflowed_p (PLUS_EXPR, type, op0, op1))
> > +    {
> > +      tree res = fold_binary (PLUS_EXPR, type, op0, op1);
> > +      if (res && TREE_CODE (res) == INTEGER_CST && !TREE_OVERFLOW (res))
> > +       return res;
> > +      return NULL_TREE;
> > +    }
>
> I think the better way of doing this is what is done in
> simplify-rtx.cc for SS_PLUS and US_PLUS instead of creating trees
> here.
> SS_PLUS:
>           result = wi::add (pop0, pop1, SIGNED, &overflow);
>           if (overflow == wi::OVF_OVERFLOW)
>             result = wi::max_value (GET_MODE_PRECISION (int_mode), SIGNED);
>           else if (overflow == wi::OVF_UNDERFLOW)
>             result = wi::min_value (GET_MODE_PRECISION (int_mode), SIGNED);
>           else if (overflow != wi::OVF_NONE)
>             return NULL_RTX;
> US_PLUS:
>           result = wi::add (pop0, pop1, UNSIGNED, &overflow);
>           if (overflow != wi::OVF_NONE)
>             result = wi::max_value (GET_MODE_PRECISION (int_mode), UNSIGNED);
>           break;
>
> And then convert the result (wide_int) to a tree using wide_int_to_tree.
>
>
>
>
> > +
> > +  if (TYPE_UNSIGNED (type) || tree_int_cst_sgn (op0) >= 0)
> > +    return TYPE_MAX_VALUE (type);
> > +  return TYPE_MIN_VALUE (type);
> > +}
> > +
> >  /* Try to evaluate:
> >
> >        *RESULT = FN (*ARG0, *ARG1)
> > @@ -1886,6 +1913,9 @@ fold_const_call (combined_fn fn, tree type, tree 
> > arg0, tree arg1)
> >      case CFN_VEC_EXTRACT:
> >        return fold_const_vec_extract (type, arg0, arg1);
> >
> > +    case CFN_SAT_ADD:
> > +      return fold_internal_fn_sat_add (type, arg0, arg1);
> > +
> >      case CFN_UBSAN_CHECK_ADD:
> >      case CFN_ADD_OVERFLOW:
> >        subcode = PLUS_EXPR;
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index 7b652afb43d..9adc3f38989 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -8738,6 +8738,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >         && TYPE_UNSIGNED (TREE_TYPE (@0)))
> >     (cmp @1 @0))))
> >
> > +/* x SAT_ADD 0 == x */
> > +(simplify
> > + (CFN_SAT_ADD @0 INTEGER_CST@1)
> > + (if (integer_zerop (@1))
> > +     @0))
> > +
> > +(simplify
> > + (CFN_SAT_ADD INTEGER_CST@0 @1)
> > + (if (integer_zerop (@0))
> > +     @1))
>
> Use IFN_SAT_ADD of CFN_SAT_ADD instead.
> Also I think you could just do:
> (simplify
>   (IFN_SAT_ADD:c @0 integer_zerop)
>   @0)
>
> If `:c` does not work, then I would approve the use of `:C` here with
> a comment saying SAT_ADD is still commutative.

You can also add IFN_SAT_ADD to genmatch.cc:commutative_op

>
> Thanks,
> Andrea
>
> > +
> >  /* Optimize A - B + -1 >= A into B >= A for unsigned comparisons.  */
> >  (for cmp (ge lt)
> >   (simplify
> > diff --git a/gcc/testsuite/gcc.dg/pr123826.c 
> > b/gcc/testsuite/gcc.dg/pr123826.c
> > new file mode 100644
> > index 00000000000..d6689aa91d4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr123826.c
> > @@ -0,0 +1,33 @@
> > +/* PR middle-end/123286 */
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> > +
> > +#include <arm_neon.h>
> > +#include <stdint.h>
> > +
> > +uint64_t
> > +f1 (uint64_t a)
> > +{
> > +  uint64x1_t va;
> > +  uint64x1_t vz;
> > +
> > +  va = vdup_n_u64 (a);
> > +  vz = vdup_n_u64 (0);
> > +
> > +  return vqadd_u64 (va, vz)[0];
> > +}
> > +
> > +uint64_t
> > +f2 (uint64_t a)
> > +{
> > +  uint64x1_t va;
> > +  uint64x1_t vz;
> > +
> > +  va = vdup_n_u64 (0);
> > +  vz = vdup_n_u64 (a);
> > +
> > +  return vqadd_u64 (va, vz)[0];
> > +}
> > +
> > +/* Both SAT_ADD calls should fold away.  */
> > +/* { dg-final { scan-tree-dump-not "\\.SAT_ADD" "optimized" } } */
> > --
> > 2.34.1
> >

Reply via email to