sub(a, b)) [PR116815]

Dhruv Chawla Thu, 24 Jul 2025 00:30:37 -0700

On 24/07/25 11:21, Andrew Pinski wrote:

External email: Use caution opening links or attachments



On Wed, Jul 23, 2025 at 10:16 PM <dhr...@nvidia.com> wrote:


From: Dhruv Chawla <dhr...@nvidia.com>

This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? sum : a
- min (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? a : sum
- max (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? sum : a

Where ovf is the overflow flag. adds and subs are generated by
generating a parallel compare+plus/minus which maps to the pattern
add<mode>3_compareC. sub<mode>3_compareC is also created to have an
equivalent pattern for the subs instruction.

This patch is a respin of the patch posted at
https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685021.html as per
the suggestion to turn it into a target-specific transform by Richard
Biener.

Bootstrapped and regtested on aarch64-unknown-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>

         PR middle-end/116815

gcc/ChangeLog:

         * config/aarch64/aarch64.md (sub<mode>3_compareC): New pattern.
         (*aarch64_plus_within_<optab><mode>3_<ovf_commutate>): Likewise.
         (*aarch64_minus_within_<optab><mode>3): Likewise.
         * config/aarch64/iterators.md (ovf_add_cmp): New code attribute.
         (ovf_sub_cmp): Likewise.
         (ovf_commutate): New iterator.
         (ovf_comm_opp): New int attribute.

gcc/testsuite/ChangeLog:

         * gcc.target/aarch64/pr116815-1.c: New test.
         * gcc.target/aarch64/pr116815-2.c: Likewise.
         * gcc.target/aarch64/pr116815-3.c: Likewise.
         * gcc.target/aarch64/pr116815-4.c: Likewise.
         * gcc.target/aarch64/pr116815-5.c: Likewise.
---
  gcc/config/aarch64/aarch64.md                 |  73 +++++++++++
  gcc/config/aarch64/iterators.md               |   9 ++
  gcc/testsuite/gcc.target/aarch64/pr116815-1.c | 119 ++++++++++++++++++
  gcc/testsuite/gcc.target/aarch64/pr116815-2.c |  94 ++++++++++++++
  gcc/testsuite/gcc.target/aarch64/pr116815-3.c |  63 ++++++++++
  gcc/testsuite/gcc.target/aarch64/pr116815-4.c |  49 ++++++++
  gcc/testsuite/gcc.target/aarch64/pr116815-5.c |  45 +++++++
  7 files changed, 452 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-1.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-2.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-3.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-4.c
  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-5.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a4ae6859da0..c9f88c40473 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3741,6 +3741,20 @@
    [(set_attr "type" "alus_sreg")]
  )

+;; An equivalent to add<mode>3_compareC
+(define_insn "sub<mode>3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+       (compare:CC_C
+         (minus:GPI
+           (match_operand:GPI 1 "register_operand" "r")
+           (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\t%<w>0, %<w>1, %<w>2"
+)
+
  (define_peephole2
    [(set (match_operand:GPI 0 "aarch64_general_reg")
         (minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
@@ -4481,6 +4495,65 @@
    [(set_attr "type" "<su>div")]
  )

+;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a
+;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum
+;; ... along with the commutative version of add (a, b) i.e. add (b, a)
+(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>"
+  [(set (match_operand:GPI 0 "register_operand")
+       (UMAXMIN:GPI
+         (plus:GPI (match_operand:GPI 1 "register_operand")
+                   (match_operand:GPI 2 "register_operand"))
+         (match_dup <ovf_commutate>)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"


Since this is a define_insn_and_split, I think there should be
constraints and not just predicates on it. I don't think you can
depend on it being split before RA (though I could be wrong) or
matching post RA.

+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (plus:GPI (match_dup ovf_commutate)


I am thinking you are missing "<>" around ovf_commutate here and other
places below. Or did I misunderstand how this works?


Hi,

Yeah, this is a bit weird. It looks like the split part of a pattern does
not require the <>. Infact, I was getting undefined iterator errors when
I put them. Without them, the md gets correctly generated (I verified using
"make mddump" in build/gcc/).

+                                   (match_dup <ovf_comm_opp>))
+                         (match_dup ovf_commutate)))
+       (set (match_dup 3) (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>)))])
+   (set (match_dup 0)
+       (if_then_else:GPI (<ovf_add_cmp> (reg:CC CC_REGNUM)
+                                           (const_int 0))
+                         (match_dup 3)
+                         (match_dup ovf_commutate)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
+;; umax (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? a : sum
+;; umin (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? sum : a
+(define_insn_and_split "*aarch64_minus_within_<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand")
+       (UMAXMIN:GPI
+         (minus:GPI (match_operand:GPI 1 "register_operand")
+                    (match_operand:GPI 2 "register_operand"))
+         (match_dup 1)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (minus:GPI (match_dup 1) (match_dup 2))
+                         (match_dup 1)))
+       (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+           (if_then_else:GPI (<ovf_sub_cmp> (reg:CC CC_REGNUM)
+                                               (const_int 0))
+                             (match_dup 3)
+                             (match_dup 1)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)

Same comments as above dealing with pre/post RA.

Thanks,
Andrew

+
  ;; -------------------------------------------------------------------
  ;; Comparison insns
  ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 795c4ac7a57..430c49cc6c9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2795,6 +2795,8 @@

  (define_code_iterator FMAXMIN [smax smin])

+(define_code_iterator UMAXMIN [umax umin])
+
  ;; Signed and unsigned max operations.
  (define_code_iterator USMAX [smax umax])

@@ -3087,6 +3089,13 @@

  (define_code_attr maxminand [(smax "bic") (smin "and")])

+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+(define_code_attr ovf_sub_cmp [(umax "ltu") (umin "geu")])
+
+;; Operand numbers for commutative operations
+(define_int_iterator ovf_commutate [1 2])
+(define_int_attr ovf_comm_opp [(1 "2") (2 "1")])
+
  ;; MLA/MLS attributes.
  (define_code_attr as [(ss_plus "a") (ss_minus "s")])

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-1.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
new file mode 100644
index 00000000000..375c5f7c8b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}


Just a small comment here about the testcase, I would mark these as
always_inline; yes the inliner heuristics should inline these always
but this testcase is not testing that.


Here's an updated version of the patch with the changes applied. Thanks
for the review!

-- >8 --

From da39143fe191e7d7596e040d1b45bdf6714f96dd Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhr...@nvidia.com>
Date: Wed, 23 Jul 2025 01:41:51 -0700
Subject: [PATCH] [aarch64] Make better use of overflowing operations in 
max/min(a, add/sub(a, b)) [PR116815]

This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? sum : a
- min (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? a : sum
- max (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? sum : a

Where ovf is the overflow flag. adds and subs are generated by
generating a parallel compare+plus/minus which maps to the pattern
add<mode>3_compareC. sub<mode>3_compareC is also created to have an
equivalent pattern for the subs instruction.

This patch is a respin of the patch posted at
https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685021.html as per
the suggestion to turn it into a target-specific transform by Richard
Biener.

Bootstrapped and regtested on aarch64-unknown-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>

        PR middle-end/116815

gcc/ChangeLog:

        * config/aarch64/aarch64.md (sub<mode>3_compareC): New pattern.
        (*aarch64_plus_within_<optab><mode>3_<ovf_commutate>): Likewise.
        (*aarch64_minus_within_<optab><mode>3): Likewise.
        * config/aarch64/iterators.md (ovf_add_cmp): New code attribute.
        (ovf_sub_cmp): Likewise.
        (ovf_commutate): New iterator.
        (ovf_comm_opp): New int attribute.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/pr116815-1.c: New test.
        * gcc.target/aarch64/pr116815-2.c: Likewise.
        * gcc.target/aarch64/pr116815-3.c: Likewise.
        * gcc.target/aarch64/pr116815-4.c: Likewise.
        * gcc.target/aarch64/pr116815-5.c: Likewise.
---
 gcc/config/aarch64/aarch64.md                 |  73 +++++++++++
 gcc/config/aarch64/iterators.md               |   9 ++
 gcc/testsuite/gcc.target/aarch64/pr116815-1.c | 119 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-2.c |  94 ++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-3.c |  63 ++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-4.c |  49 ++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-5.c |  45 +++++++
 7 files changed, 452 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-5.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a4ae6859da0..cea0964f673 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3741,6 +3741,20 @@
   [(set_attr "type" "alus_sreg")]
 )

+;; An equivalent to add<mode>3_compareC

+(define_insn "sub<mode>3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+       (compare:CC_C
+         (minus:GPI
+           (match_operand:GPI 1 "register_operand" "r")
+           (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\t%<w>0, %<w>1, %<w>2"
+)
+
 (define_peephole2
   [(set (match_operand:GPI 0 "aarch64_general_reg")
        (minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
@@ -4481,6 +4495,65 @@
   [(set_attr "type" "<su>div")]
 )

+;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a

+;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum
+;; ... along with the commutative version of add (a, b) i.e. add (b, a)
+(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (plus:GPI (match_operand:GPI 1 "register_operand" "r")
+                   (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup <ovf_commutate>)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>))
+                         (match_dup ovf_commutate)))
+       (set (match_dup 3) (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>)))])
+   (set (match_dup 0)
+       (if_then_else:GPI (<ovf_add_cmp> (reg:CC CC_REGNUM)
+                                           (const_int 0))
+                         (match_dup 3)
+                         (match_dup ovf_commutate)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
+;; umax (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? a : sum
+;; umin (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? sum : a
+(define_insn_and_split "*aarch64_minus_within_<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+                    (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (minus:GPI (match_dup 1) (match_dup 2))
+                         (match_dup 1)))
+       (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+           (if_then_else:GPI (<ovf_sub_cmp> (reg:CC CC_REGNUM)
+                                               (const_int 0))
+                             (match_dup 3)
+                             (match_dup 1)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Comparison insns
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 795c4ac7a57..430c49cc6c9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2795,6 +2795,8 @@

(define_code_iterator FMAXMIN [smax smin])+(define_code_iterator UMAXMIN [umax umin])

+
 ;; Signed and unsigned max operations.
 (define_code_iterator USMAX [smax umax])

@@ -3087,6 +3089,13 @@(define_code_attr maxminand [(smax "bic") (smin "and")])+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])

+(define_code_attr ovf_sub_cmp [(umax "ltu") (umin "geu")])
+
+;; Operand numbers for commutative operations
+(define_int_iterator ovf_commutate [1 2])
+(define_int_attr ovf_comm_opp [(1 "2") (2 "1")])
+
 ;; MLA/MLS attributes.
 (define_code_attr as [(ss_plus "a") (ss_minus "s")])

diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-1.c b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c

new file mode 100644
index 00000000000..4ba7f41abd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+/*
+** umaxadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 1, a, a + b)
+
+/*
+** umaxadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 2, a, b + a)
+
+/*
+** umaxadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 3, a + b, a)
+
+/*
+** umaxadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 4, b + a, a)
+
+/*
+** uminadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 1, a, a + b)
+
+/*
+** uminadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 2, a, b + a)
+
+/*
+** uminadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 3, a + b, a)
+
+/*
+** uminadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 4, b + a, a)
+
+/*
+** umaxsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 1, a, a - b)
+
+/*
+** umaxsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 2, a - b, a)
+
+/*
+** uminsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 1, a, a - b)
+
+/*
+** uminsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 2, a - b, a)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
new file mode 100644
index 00000000000..748b2de8e97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests.  */
+
+static inline int __attribute__ ((always_inline))
+smax (int a, int b)
+{
+  return a > b ? a : b;
+}
+
+static inline int __attribute__ ((always_inline))
+smin (int a, int b)
+{
+  return a < b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umax (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umin (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types.  */
+
+int
+smax_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a - b);
+}
+
+/* Invalid patterns.  */
+
+/* This can potentially be matched, but the RHS gets factored to
+   (a + b) * b.  */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+  return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+  return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+  return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+  return umin (a, b - a);
+}
+
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-3.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
new file mode 100644
index 00000000000..b9a3b4bba3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+/* FIXME: This should only generate one adds.  */
+
+/*
+** umax_add_umin_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cs
+**     adds    (w[0-9]+), w1, w0
+**     csel    w0, \2, w1, cc
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+  return max (a, a + b) + min (a + b, b);
+}
+
+/*
+** umin_add_umax_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cc
+**     adds    (w[0-9]+), w1, w0
+**     csel    \2, \2, w1, cs
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+  return min (a, b + a) + max (b + a, b);
+}
+
+/* FIXME: This pattern does not get optimized.  */
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+  if (a > 5)
+    return max (a, a + b);
+  else
+    return min (a, a + b);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-4.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
new file mode 100644
index 00000000000..1aedcc2ebc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,     
\
+                          unsigned e)                                         \
+  {                                                                            
\
+    unsigned result = op (exp1, exp2);                                         
\
+    if (result == c || result == c * 2)                                        
\
+      return d;                                                                
\
+    else                                                                       
\
+      return e;                                                                
\
+  }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "adds\\t" 8 } } */
+/* { dg-final { scan-assembler-times "subs\\t" 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-5.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
new file mode 100644
index 00000000000..76ed8f9670a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+cssc"
+
+/* PR middle-end/116815 */
+
+/* Make sure that umax/umin instructions are generated with CSSC.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "umax\\t" 6 } } */
+/* { dg-final { scan-assembler-times "umin\\t" 6 } } */
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
--
2.44.0

+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+/*
+** umaxadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 1, a, a + b)
+
+/*
+** umaxadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 2, a, b + a)
+
+/*
+** umaxadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 3, a + b, a)
+
+/*
+** umaxadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 4, b + a, a)
+
+/*
+** uminadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 1, a, a + b)
+
+/*
+** uminadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 2, a, b + a)
+
+/*
+** uminadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 3, a + b, a)
+
+/*
+** uminadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 4, b + a, a)
+
+/*
+** umaxsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 1, a, a - b)
+
+/*
+** umaxsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 2, a - b, a)
+
+/*
+** uminsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 1, a, a - b)
+
+/*
+** uminsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 2, a - b, a)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
new file mode 100644
index 00000000000..c5e8df2aa34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests.  */
+
+static inline int
+smax (int a, int b)
+{
+  return a > b ? a : b;
+}
+
+static inline int
+smin (int a, int b)
+{
+  return a < b ? a : b;
+}
+
+static inline unsigned
+umax (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+umin (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types.  */
+
+int
+smax_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a - b);
+}
+
+/* Invalid patterns.  */
+
+/* This can potentially be matched, but the RHS gets factored to
+   (a + b) * b.  */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+  return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+  return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+  return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+  return umin (a, b - a);
+}
+
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-3.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
new file mode 100644
index 00000000000..3095a50ac91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+/* FIXME: This should only generate one adds.  */
+
+/*
+** umax_add_umin_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cs
+**     adds    (w[0-9]+), w1, w0
+**     csel    w0, \2, w1, cc
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+  return max (a, a + b) + min (a + b, b);
+}
+
+/*
+** umin_add_umax_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cc
+**     adds    (w[0-9]+), w1, w0
+**     csel    \2, \2, w1, cs
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+  return min (a, b + a) + max (b + a, b);
+}
+
+/* FIXME: This pattern does not get optimized.  */
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+  if (a > 5)
+    return max (a, a + b);
+  else
+    return min (a, a + b);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-4.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
new file mode 100644
index 00000000000..1d5ca1c1d92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,     
\
+                          unsigned e)                                         \
+  {                                                                            
\
+    unsigned result = op (exp1, exp2);                                         
\
+    if (result == c || result == c * 2)                                        
\
+      return d;                                                                
\
+    else                                                                       
\
+      return e;                                                                
\
+  }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "adds\\t" 8 } } */
+/* { dg-final { scan-assembler-times "subs\\t" 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-5.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
new file mode 100644
index 00000000000..0500056b6b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+cssc"
+
+/* PR middle-end/116815 */
+
+/* Make sure that umax/umin instructions are generated with CSSC.  */
+
+static inline unsigned
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "umax\\t" 6 } } */
+/* { dg-final { scan-assembler-times "umin\\t" 6 } } */
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
--
2.44.0



--
Regards,
Dhruv

From da39143fe191e7d7596e040d1b45bdf6714f96dd Mon Sep 17 00:00:00 2001
From: Dhruv Chawla <dhr...@nvidia.com>
Date: Wed, 23 Jul 2025 01:41:51 -0700
Subject: [PATCH] [aarch64] Make better use of overflowing operations in 
max/min(a, add/sub(a, b)) [PR116815]

This patch folds the following patterns:
- max (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? sum : a
- min (a, add (a, b)) -> [sum, ovf] = adds (a, b); !ovf ? a : sum
- max (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? a : sum
- min (a, sub (a, b)) -> [sum, ovf] = subs (a, b); !ovf ? sum : a

Where ovf is the overflow flag. adds and subs are generated by
generating a parallel compare+plus/minus which maps to the pattern
add<mode>3_compareC. sub<mode>3_compareC is also created to have an
equivalent pattern for the subs instruction.

This patch is a respin of the patch posted at
https://gcc.gnu.org/pipermail/gcc-patches/2025-May/685021.html as per
the suggestion to turn it into a target-specific transform by Richard
Biener.

Bootstrapped and regtested on aarch64-unknown-linux-gnu.

Signed-off-by: Dhruv Chawla <dhr...@nvidia.com>

        PR middle-end/116815

gcc/ChangeLog:

        * config/aarch64/aarch64.md (sub<mode>3_compareC): New pattern.
        (*aarch64_plus_within_<optab><mode>3_<ovf_commutate>): Likewise.
        (*aarch64_minus_within_<optab><mode>3): Likewise.
        * config/aarch64/iterators.md (ovf_add_cmp): New code attribute.
        (ovf_sub_cmp): Likewise.
        (ovf_commutate): New iterator.
        (ovf_comm_opp): New int attribute.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/pr116815-1.c: New test.
        * gcc.target/aarch64/pr116815-2.c: Likewise.
        * gcc.target/aarch64/pr116815-3.c: Likewise.
        * gcc.target/aarch64/pr116815-4.c: Likewise.
        * gcc.target/aarch64/pr116815-5.c: Likewise.
---
 gcc/config/aarch64/aarch64.md                 |  73 +++++++++++
 gcc/config/aarch64/iterators.md               |   9 ++
 gcc/testsuite/gcc.target/aarch64/pr116815-1.c | 119 ++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-2.c |  94 ++++++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-3.c |  63 ++++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-4.c |  49 ++++++++
 gcc/testsuite/gcc.target/aarch64/pr116815-5.c |  45 +++++++
 7 files changed, 452 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr116815-5.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a4ae6859da0..cea0964f673 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3741,6 +3741,20 @@
   [(set_attr "type" "alus_sreg")]
 )
 
+;; An equivalent to add<mode>3_compareC
+(define_insn "sub<mode>3_compareC"
+  [(set (reg:CC_C CC_REGNUM)
+       (compare:CC_C
+         (minus:GPI
+           (match_operand:GPI 1 "register_operand" "r")
+           (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r")
+       (minus:GPI (match_dup 1) (match_dup 2)))]
+  ""
+  "subs\t%<w>0, %<w>1, %<w>2"
+)
+
 (define_peephole2
   [(set (match_operand:GPI 0 "aarch64_general_reg")
        (minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero")
@@ -4481,6 +4495,65 @@
   [(set_attr "type" "<su>div")]
 )
 
+;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a
+;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum
+;; ... along with the commutative version of add (a, b) i.e. add (b, a)
+(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (plus:GPI (match_operand:GPI 1 "register_operand" "r")
+                   (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup <ovf_commutate>)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>))
+                         (match_dup ovf_commutate)))
+       (set (match_dup 3) (plus:GPI (match_dup ovf_commutate)
+                                   (match_dup <ovf_comm_opp>)))])
+   (set (match_dup 0)
+       (if_then_else:GPI (<ovf_add_cmp> (reg:CC CC_REGNUM)
+                                           (const_int 0))
+                         (match_dup 3)
+                         (match_dup ovf_commutate)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
+;; umax (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? a : sum
+;; umin (a, sub (a, b)) => [sum, ovf] = subs (a, b); !ovf ? sum : a
+(define_insn_and_split "*aarch64_minus_within_<optab><mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (UMAXMIN:GPI
+         (minus:GPI (match_operand:GPI 1 "register_operand" "r")
+                    (match_operand:GPI 2 "register_operand" "r"))
+         (match_dup 1)))
+   (clobber (match_scratch:GPI 3))]
+  "!TARGET_CSSC"
+  "#"
+  "&& !reload_completed"
+  [(parallel
+      [(set (reg:CC_C CC_REGNUM)
+           (compare:CC_C (minus:GPI (match_dup 1) (match_dup 2))
+                         (match_dup 1)))
+       (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 0)
+           (if_then_else:GPI (<ovf_sub_cmp> (reg:CC CC_REGNUM)
+                                               (const_int 0))
+                             (match_dup 3)
+                             (match_dup 1)))]
+  {
+    if (GET_CODE (operands[3]) == SCRATCH)
+      operands[3] = gen_reg_rtx (<MODE>mode);
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Comparison insns
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 795c4ac7a57..430c49cc6c9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2795,6 +2795,8 @@
 
 (define_code_iterator FMAXMIN [smax smin])
 
+(define_code_iterator UMAXMIN [umax umin])
+
 ;; Signed and unsigned max operations.
 (define_code_iterator USMAX [smax umax])
 
@@ -3087,6 +3089,13 @@
 
 (define_code_attr maxminand [(smax "bic") (smin "and")])
 
+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+(define_code_attr ovf_sub_cmp [(umax "ltu") (umin "geu")])
+
+;; Operand numbers for commutative operations
+(define_int_iterator ovf_commutate [1 2])
+(define_int_attr ovf_comm_opp [(1 "2") (2 "1")])
+
 ;; MLA/MLS attributes.
 (define_code_attr as [(ss_plus "a") (ss_minus "s")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-1.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
new file mode 100644
index 00000000000..4ba7f41abd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-1.c
@@ -0,0 +1,119 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+/*
+** umaxadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 1, a, a + b)
+
+/*
+** umaxadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 2, a, b + a)
+
+/*
+** umaxadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 3, a + b, a)
+
+/*
+** umaxadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (max, add, 4, b + a, a)
+
+/*
+** uminadd1:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 1, a, a + b)
+
+/*
+** uminadd2:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 2, a, b + a)
+
+/*
+** uminadd3:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 3, a + b, a)
+
+/*
+** uminadd4:
+**     adds    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (min, add, 4, b + a, a)
+
+/*
+** umaxsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 1, a, a - b)
+
+/*
+** umaxsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cc
+**     ret
+*/
+OPERATION (max, sub, 2, a - b, a)
+
+/*
+** uminsub1:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 1, a, a - b)
+
+/*
+** uminsub2:
+**     subs    (w[0-9]+), w0, w1
+**     csel    w0, \1, w0, cs
+**     ret
+*/
+OPERATION (min, sub, 2, a - b, a)
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-2.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
new file mode 100644
index 00000000000..748b2de8e97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-2.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Negative tests.  */
+
+static inline int __attribute__ ((always_inline))
+smax (int a, int b)
+{
+  return a > b ? a : b;
+}
+
+static inline int __attribute__ ((always_inline))
+smin (int a, int b)
+{
+  return a < b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umax (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+umin (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define ASSUME(cond) if (!(cond)) __builtin_unreachable ();
+
+/* This transformation does not trigger on signed types.  */
+
+int
+smax_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a + b);
+}
+
+int
+smin_add (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a + b);
+}
+
+int
+smax_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smax (a, a - b);
+}
+
+int
+smin_sub (int a, int b)
+{
+  ASSUME (b >= 0);
+  return smin (a, a - b);
+}
+
+/* Invalid patterns.  */
+
+/* This can potentially be matched, but the RHS gets factored to
+   (a + b) * b.  */
+unsigned
+umax_factored (unsigned a, unsigned b)
+{
+  return umax (a * b, a * b + b * b);
+}
+
+unsigned
+umin_mult (unsigned a, unsigned b)
+{
+  return umin (a, a * b);
+}
+
+unsigned
+umax_sub (unsigned a, unsigned b)
+{
+  return umax (a, b - a);
+}
+
+unsigned
+umin_sub (unsigned a, unsigned b)
+{
+  return umin (a, b - a);
+}
+
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-3.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
new file mode 100644
index 00000000000..b9a3b4bba3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-3.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+/* PR middle-end/116815 */
+
+/* Multi-use tests.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+/* FIXME: This should only generate one adds.  */
+
+/*
+** umax_add_umin_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cs
+**     adds    (w[0-9]+), w1, w0
+**     csel    w0, \2, w1, cc
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umax_add_umin_add (unsigned a, unsigned b)
+{
+  return max (a, a + b) + min (a + b, b);
+}
+
+/*
+** umin_add_umax_add:
+**     adds    (w[0-9]+), w0, w1
+**     csel    \1, \1, w0, cc
+**     adds    (w[0-9]+), w1, w0
+**     csel    \2, \2, w1, cs
+**     add     w0, \1, \2
+**     ret
+*/
+unsigned
+umin_add_umax_add (unsigned a, unsigned b)
+{
+  return min (a, b + a) + max (b + a, b);
+}
+
+/* FIXME: This pattern does not get optimized.  */
+
+unsigned
+multiple_paths (unsigned a, unsigned b)
+{
+  if (a > 5)
+    return max (a, a + b);
+  else
+    return min (a, a + b);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-4.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
new file mode 100644
index 00000000000..1aedcc2ebc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-4.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* PR middle-end/116815 */
+
+/* Single-use tests with a use of the min-max in an if-condition.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b, unsigned c, unsigned d,     
\
+                          unsigned e)                                         \
+  {                                                                            
\
+    unsigned result = op (exp1, exp2);                                         
\
+    if (result == c || result == c * 2)                                        
\
+      return d;                                                                
\
+    else                                                                       
\
+      return e;                                                                
\
+  }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "adds\\t" 8 } } */
+/* { dg-final { scan-assembler-times "subs\\t" 4 } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/pr116815-5.c 
b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
new file mode 100644
index 00000000000..76ed8f9670a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr116815-5.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+cssc"
+
+/* PR middle-end/116815 */
+
+/* Make sure that umax/umin instructions are generated with CSSC.  */
+
+static inline unsigned __attribute__ ((always_inline))
+max (unsigned a, unsigned b)
+{
+  return a > b ? a : b;
+}
+
+static inline unsigned __attribute__ ((always_inline))
+min (unsigned a, unsigned b)
+{
+  return a < b ? a : b;
+}
+
+#define OPERATION(op, type, N, exp1, exp2)                                     
\
+  unsigned u##op##type##N (unsigned a, unsigned b) { return op (exp1, exp2); }
+
+OPERATION (max, add, 1, a, a + b)
+OPERATION (max, add, 2, a, b + a)
+OPERATION (max, add, 3, a + b, a)
+OPERATION (max, add, 4, b + a, a)
+
+OPERATION (min, add, 1, a, a + b)
+OPERATION (min, add, 2, a, b + a)
+OPERATION (min, add, 3, a + b, a)
+OPERATION (min, add, 4, b + a, a)
+
+OPERATION (max, sub, 1, a, a - b)
+OPERATION (max, sub, 2, a - b, a)
+
+OPERATION (min, sub, 1, a, a - b)
+OPERATION (min, sub, 2, a - b, a)
+
+/* { dg-final { scan-assembler-times "umax\\t" 6 } } */
+/* { dg-final { scan-assembler-times "umin\\t" 6 } } */
+/* { dg-final { scan-assembler-not "adds\\t" } } */
+/* { dg-final { scan-assembler-not "subs\\t" } } */
+
-- 
2.44.0

Re: [PATCH] [aarch64] Make better use of overflowing operations in max/min(a, add/sub(a, b)) [PR116815]

Reply via email to