On Fri, May 8, 2026 at 4:47 AM Naveen
<[email protected]> wrote:
>
> Extend scalar SAT_ADD constant folding to recognize cases where one operand is
> zero. It allows SAT_ADD expressions with constant operands to fold away early.
> The change improves optimization opportunities and avoids emitting unnecessary
> SAT_ADD operations.
> Bootstrapped and tested on aarch64-linux-gnu.
>
> PR middle-end/123286
>
> gcc/ChangeLog:
> * fold-const-call.cc (fold_internal_fn_sat_add): New function.
> (fold_const_call): Handle CFN_SAT_ADD.
> * match.pd: Add simplifications for x SAT_ADD 0 == x.
> * genmatch.cc (commutative_op): Add CFN_SAT_ADD.
>
> gcc/testsuite/ChangeLog:
> * gcc.dg/pr123286.c: New test.
>
> Signed-off-by: Naveen <[email protected]>
> ---
> gcc/fold-const-call.cc | 41 +++++++++++++++++++++++++++++++++
> gcc/genmatch.cc | 1 +
> gcc/match.pd | 5 ++++
> gcc/testsuite/gcc.dg/pr123286.c | 33 ++++++++++++++++++++++++++
> 4 files changed, 80 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/pr123286.c
>
> diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
> index 7dd1b21c34f..031e2d32f0c 100644
> --- a/gcc/fold-const-call.cc
> +++ b/gcc/fold-const-call.cc
> @@ -1477,6 +1477,44 @@ fold_const_vec_extract (tree, tree arg0, tree)
> return NULL_TREE;
> }
>
> +/* Try to fold scalar integer IFN_SAT_ADD with operands OP0 and OP1. */
> +
> +static tree
> +fold_internal_fn_sat_add (tree type, tree op0, tree op1)
> +{
> + if (!INTEGRAL_NB_TYPE_P (type)
> + || VECTOR_TYPE_P (type))
> + return NULL_TREE;
!INTEGRAL_NB_TYPE_P already excludes VECTOR_TYPE. So you can safely
remove that check.
> +
> + if (TREE_CODE (op0) != INTEGER_CST
> + || TREE_CODE (op1) != INTEGER_CST)
> + return NULL_TREE;
> +
> + wi::overflow_type overflow;
> + unsigned int prec = TYPE_PRECISION (type);
> +
> + if (TYPE_UNSIGNED (type))
> + {
> + wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
> + UNSIGNED, &overflow);
> + if (overflow != wi::OVF_NONE)
> + result = wi::max_value (prec, UNSIGNED);
> + return wide_int_to_tree (type, result);
> + }
> + else
> + {
> + wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
> + SIGNED, &overflow);
> + if (overflow == wi::OVF_OVERFLOW)
> + result = wi::max_value (prec, SIGNED);
> + else if (overflow == wi::OVF_UNDERFLOW)
> + result = wi::min_value (prec, SIGNED);
> + else if (overflow != wi::OVF_NONE)
> + return NULL_TREE;
> + return wide_int_to_tree (type, result);
> + }
I think we can do slightly better and less duplication:
wide_int result = wi::add (wi::to_wide (op0), wi::to_wide (op1),
TYPE_SIGNED (type), &overflow);
if (overflow == wi::OVF_NONE)
;
else if (TYPE_UNSIGNED (type))
result = wi::max_value (prec, UNSIGNED);
else
{
if (overflow == wi::OVF_OVERFLOW)
result = wi::max_value (prec, SIGNED);
else if (overflow == wi::OVF_UNDERFLOW)
result = wi::min_value (prec, SIGNED);
else
return NULL_TREE;
}
return wide_int_to_tree (type, result);
Thanks,
Andrea
> +}
> +
> /* Try to evaluate:
>
> *RESULT = FN (*ARG0, *ARG1)
> @@ -1886,6 +1924,9 @@ fold_const_call (combined_fn fn, tree type, tree arg0,
> tree arg1)
> case CFN_VEC_EXTRACT:
> return fold_const_vec_extract (type, arg0, arg1);
>
> + case CFN_SAT_ADD:
> + return fold_internal_fn_sat_add (type, arg0, arg1);
> +
> case CFN_UBSAN_CHECK_ADD:
> case CFN_ADD_OVERFLOW:
> subcode = PLUS_EXPR;
> diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
> index 633ff9e1db2..6c267ca8dc2 100644
> --- a/gcc/genmatch.cc
> +++ b/gcc/genmatch.cc
> @@ -1268,6 +1268,7 @@ commutative_op (id_base *id, bool
> compares_are_commutative = false)
> case CFN_FNMS:
> case CFN_ADD_OVERFLOW:
> case CFN_MUL_OVERFLOW:
> + case CFN_SAT_ADD:
> return 0;
>
> case CFN_COND_ADD:
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 8be7f1c60db..ecc1a362443 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -8928,6 +8928,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> && TYPE_UNSIGNED (TREE_TYPE (@0)))
> (cmp @1 @0))))
>
> +/* x SAT_ADD 0 == x. */
> +(simplify
> + (IFN_SAT_ADD:c @0 integer_zerop)
> + @0)
> +
> /* Optimize A - B + -1 >= A into B >= A for unsigned comparisons. */
> (for cmp (ge lt)
> (simplify
> diff --git a/gcc/testsuite/gcc.dg/pr123286.c b/gcc/testsuite/gcc.dg/pr123286.c
> new file mode 100644
> index 00000000000..d6689aa91d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr123286.c
> @@ -0,0 +1,33 @@
> +/* PR middle-end/123286 */
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
> +
> +#include <arm_neon.h>
> +#include <stdint.h>
> +
> +uint64_t
> +f1 (uint64_t a)
> +{
> + uint64x1_t va;
> + uint64x1_t vz;
> +
> + va = vdup_n_u64 (a);
> + vz = vdup_n_u64 (0);
> +
> + return vqadd_u64 (va, vz)[0];
> +}
> +
> +uint64_t
> +f2 (uint64_t a)
> +{
> + uint64x1_t va;
> + uint64x1_t vz;
> +
> + va = vdup_n_u64 (0);
> + vz = vdup_n_u64 (a);
> +
> + return vqadd_u64 (va, vz)[0];
> +}
> +
> +/* Both SAT_ADD calls should fold away. */
> +/* { dg-final { scan-tree-dump-not "\\.SAT_ADD" "optimized" } } */
> --
> 2.34.1
>