max operations

soumyaa Tue, 02 Sep 2025 02:16:21 -0700

From: Soumya AR <soum...@nvidia.com>

This patch adds support for atomic min/max instructions offered by aarch64 under
LSE.


The implementation provides three execution paths:

1. When LSE is available at compile time (-march=armv8.1-a or later):
   Emits LSE atomic min/max instructions (ldsmin, ldsmax, ldumin, ldumax).

2. When LSE availability is unknown at compile time:
   Uses outline atomics - calls to libgcc functions that perform runtime
   detection for LSE and dispatch to either LSE instructions or LL/SC sequences.

3. When outline atomics are explicitly disabled (-mno-outline-atomics) on
   non-LSE targets: Emits inline LL/SC (LDXR, STXR etc) sequences
   using conditional select instructions for min/max.

---

For op_fetch varaints, we first generate the appropriate fetch_op variant, then
use aarch64_split_atomic_op to generate the same operation (non-atomically) to
return the updated value. This function is extended to handle the min/max
operations. We have to be careful about QI/HI modes, as ldxr and its variants
do a zero extended load, so it's important to explicitly sign extend the values
before comparing them.

---

lse.S is responsible for emitting the appropriate LSE or non-LSE sequence. For
min/max on non-LSE systems, this is done using a conditional select.

There is, however, a unique case where systems with the CSSC extension have
native min/max instructions as well. In that case, it would be preferable to
emit the LL/SC sequence using the native min/max instructions. But, this would
only occur on targets with CSSC but without LSE, which is quite improbable, and
thus, I haven't added special handling for the CSSC feature.

---

We now overwrite our previous tests since CAS loops will no longer be emitted.
This is done to test for correctness in a consistent manner.

Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu.
Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and
aarch64-linux-gnu without LSE.

Signed-off-by: Soumya AR <soum...@nvidia.com>

gcc/ChangeLog:

        * config/aarch64/aarch64-protos.h: Add declarations for new
        outline atomic min/max name structures.
        * config/aarch64/aarch64.cc (DEF4): Define names for outline
        atomic min/max functions.
        (aarch64_ool_ldsmin_names, aarch64_ool_ldsmax_names,
        aarch64_ool_ldumin_names, aarch64_ool_ldumax_names): New.
        (aarch64_split_atomic_op): Add support for SMIN, SMAX, UMIN,
        UMAX operations with sign extension for QI/HI modes.
        * config/aarch64/atomics.md: Add LSE and outline atomics
        support for atomic fetch min/max operations.
        * config/aarch64/iterators.md: Add min/max iterators.

libgcc/ChangeLog:

        * config/aarch64/lse.S: Implement outline atomic min/max
        functions.
        * config/aarch64/t-lse: Add min/max function entries.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/atomic-minmax-lse.c: Modified for LSE min/max.
        * gcc.target/aarch64/atomic-minmax-nolse.c: Modified to check for
        non-LSE atomic min/max.
        * gcc.target/aarch64/atomic-minmax.c: Modified to check for min/max
        libcalls.
---
 gcc/config/aarch64/aarch64-protos.h           |   4 +
 gcc/config/aarch64/aarch64.cc                 |  51 +++++++++
 gcc/config/aarch64/atomics.md                 |  54 ++++++++-
 gcc/config/aarch64/iterators.md               |  30 +++--
 .../gcc.target/aarch64/atomic-minmax-lse.c    |  41 ++++---
 .../gcc.target/aarch64/atomic-minmax-nolse.c  | 103 ++++++++++--------
 .../gcc.target/aarch64/atomic-minmax.c        |  41 ++++---
 libgcc/config/aarch64/lse.S                   |  62 ++++++++++-
 libgcc/config/aarch64/t-lse                   |   3 +-
 9 files changed, 292 insertions(+), 97 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 36bd88593ff..a152d76da0e 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1256,6 +1256,10 @@ extern const atomic_ool_names aarch64_ool_ldadd_names;
 extern const atomic_ool_names aarch64_ool_ldset_names;
 extern const atomic_ool_names aarch64_ool_ldclr_names;
 extern const atomic_ool_names aarch64_ool_ldeor_names;
+extern const atomic_ool_names aarch64_ool_ldsmin_names;
+extern const atomic_ool_names aarch64_ool_ldsmax_names;
+extern const atomic_ool_names aarch64_ool_ldumin_names;
+extern const atomic_ool_names aarch64_ool_ldumax_names;
 
 tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2dbaf4a8e59..99872bf45fe 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -25745,6 +25745,10 @@ const atomic_ool_names aarch64_ool_ldadd_names = { { 
DEF4(ldadd) } };
 const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
 const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
 const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+const atomic_ool_names aarch64_ool_ldsmin_names = { { DEF4(ldsmin) } };
+const atomic_ool_names aarch64_ool_ldsmax_names = { { DEF4(ldsmax) } };
+const atomic_ool_names aarch64_ool_ldumin_names = { { DEF4(ldumin) } };
+const atomic_ool_names aarch64_ool_ldumax_names = { { DEF4(ldumax) } };
 
 #undef DEF0
 #undef DEF4
@@ -25997,6 +26001,53 @@ aarch64_split_atomic_op (enum rtx_code code, rtx 
old_out, rtx new_out, rtx mem,
       emit_insn (gen_rtx_SET (new_out, x));
       break;
 
+    case SMIN:
+    case SMAX:
+    case UMIN:
+    case UMAX:
+    {
+      rtx_code cmp_code;
+      switch (code)
+       {
+       case SMIN:
+         cmp_code = LT;
+         break;
+       case SMAX:
+         cmp_code = GT;
+         break;
+       case UMIN:
+         cmp_code = LTU;
+         break;
+       case UMAX:
+         cmp_code = GTU;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+
+      if ((code == SMIN || code == SMAX) && (mode == QImode || mode == HImode))
+       {
+         rtx old_extended = gen_rtx_REG (wmode, REGNO (old_out));
+         emit_insn (
+           gen_rtx_SET (old_extended,
+                        gen_rtx_SIGN_EXTEND (wmode,
+                                             gen_lowpart (mode, old_out))));
+         old_out = old_extended;
+
+         rtx value_extended = gen_rtx_REG (wmode, REGNO (value));
+         emit_insn (
+           gen_rtx_SET (value_extended,
+                        gen_rtx_SIGN_EXTEND (wmode,
+                                             gen_lowpart (mode, value))));
+         value = value_extended;
+       }
+      rtx cc_reg = aarch64_gen_compare_reg (cmp_code, old_out, value);
+      rtx cond = gen_rtx_fmt_ee (cmp_code, VOIDmode, cc_reg, const0_rtx);
+      x = gen_rtx_IF_THEN_ELSE (wmode, cond, old_out, value);
+      emit_insn (gen_rtx_SET (new_out, x));
+      break;
+    }
+
     case MINUS:
       if (CONST_INT_P (value))
        {
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index ea4a9367fc8..64b76f3df12 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -284,6 +284,18 @@
          case XOR:
            gen = gen_aarch64_atomic_xor<mode>_lse;
            break;
+         case SMAX:
+           gen = gen_aarch64_atomic_smax<mode>_lse;
+           break;
+         case SMIN:
+           gen = gen_aarch64_atomic_smin<mode>_lse;
+           break;
+         case UMAX:
+           gen = gen_aarch64_atomic_umax<mode>_lse;
+           break;
+         case UMIN:
+           gen = gen_aarch64_atomic_umin<mode>_lse;
+           break;
          case AND:
            operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
                                              NULL, 1);
@@ -317,6 +329,18 @@
                                              NULL, 1);
            names = &aarch64_ool_ldclr_names;
            break;
+         case SMIN:
+           names = &aarch64_ool_ldsmin_names;
+           break;
+         case SMAX:
+           names = &aarch64_ool_ldsmax_names;
+           break;
+         case UMIN:
+           names = &aarch64_ool_ldumin_names;
+           break;
+         case UMAX:
+           names = &aarch64_ool_ldumax_names;
+           break;
          default:
            gcc_unreachable ();
          }
@@ -442,6 +466,18 @@
        case XOR:
          gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
          break;
+       case SMAX:
+         gen = gen_aarch64_atomic_fetch_smax<mode>_lse;
+         break;
+       case SMIN:
+         gen = gen_aarch64_atomic_fetch_smin<mode>_lse;
+         break;
+       case UMAX:
+         gen = gen_aarch64_atomic_fetch_umax<mode>_lse;
+         break;
+       case UMIN:
+         gen = gen_aarch64_atomic_fetch_umin<mode>_lse;
+         break;
        case AND:
          operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
                                            NULL, 1);
@@ -475,6 +511,18 @@
                                            NULL, 1);
          names = &aarch64_ool_ldclr_names;
          break;
+       case SMIN:
+         names = &aarch64_ool_ldsmin_names;
+         break;
+       case SMAX:
+         names = &aarch64_ool_ldsmax_names;
+         break;
+       case UMIN:
+         names = &aarch64_ool_ldumin_names;
+         break;
+       case UMAX:
+         names = &aarch64_ool_ldumax_names;
+         break;
        default:
          gcc_unreachable ();
        }
@@ -581,7 +629,11 @@
       operands[2] = force_reg (<MODE>mode, operands[2]);
       emit_insn (gen_atomic_fetch_<atomic_optab><mode>
                  (tmp, operands[1], operands[2], operands[3]));
-      tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+      if (<CODE> == SMIN || <CODE> == SMAX)
+       tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
+                                operands[0], 0, OPTAB_WIDEN);
+      else
+       tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
                                 operands[0], 1, OPTAB_WIDEN);
       emit_move_insn (operands[0], tmp);
     }
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8f8237edf6c..fdb15fb212a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1302,6 +1302,10 @@
     UNSPECV_ATOMIC_LDOP_BIC    ; Represent an atomic load-bic
     UNSPECV_ATOMIC_LDOP_XOR    ; Represent an atomic load-xor
     UNSPECV_ATOMIC_LDOP_PLUS   ; Represent an atomic load-add
+    UNSPECV_ATOMIC_LDOP_SMAX   ; Represent an atomic load-smax
+    UNSPECV_ATOMIC_LDOP_SMIN   ; Represent an atomic load-smin
+    UNSPECV_ATOMIC_LDOP_UMAX   ; Represent an atomic load-umax
+    UNSPECV_ATOMIC_LDOP_UMIN   ; Represent an atomic load-umin
 ])
 
 ;; -------------------------------------------------------------------
@@ -2782,7 +2786,7 @@
 ;; Iterator for __sync_<op> operations that where the operation can be
 ;; represented directly RTL.  This is all of the sync operations bar
 ;; nand.
-(define_code_iterator atomic_op [plus minus ior xor and])
+(define_code_iterator atomic_op [plus minus ior xor and smin smax umin umax])
 
 ;; Iterator for integer conversions
 (define_code_iterator FIXUORS [fix unsigned_fix])
@@ -3095,21 +3099,27 @@
 
 ;; Atomic operations
 (define_code_attr atomic_optab
-  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")])
+  [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")
+  (smin "smin") (smax "smax") (umin "umin") (umax "umax")])
 
 (define_code_attr atomic_op_operand
   [(ior "aarch64_logical_operand")
    (xor "aarch64_logical_operand")
    (and "aarch64_logical_operand")
    (plus "aarch64_plus_operand")
-   (minus "aarch64_plus_operand")])
+   (minus "aarch64_plus_operand")
+   (smin "aarch64_sminmax_operand")
+   (smax "aarch64_sminmax_operand")
+   (umin "aarch64_uminmax_operand")
+   (umax "aarch64_uminmax_operand")])
 
 ;; Constants acceptable for atomic operations.
 ;; This definition must appear in this file before the iterators it refers to.
 (define_code_attr const_atomic
  [(plus "IJ") (minus "IJ")
   (xor "<lconst_atomic>") (ior "<lconst_atomic>")
-  (and "<lconst_atomic>")])
+  (and "<lconst_atomic>")
+  (smin "") (smax "") (umin "") (umax "")])
 
 ;; Attribute to describe constants acceptable in atomic logical operations
 (define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")])
@@ -3948,7 +3958,9 @@
 
 (define_int_iterator ATOMIC_LDOP
  [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
-  UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
+  UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS
+  UNSPECV_ATOMIC_LDOP_SMAX UNSPECV_ATOMIC_LDOP_SMIN
+  UNSPECV_ATOMIC_LDOP_UMAX UNSPECV_ATOMIC_LDOP_UMIN])
 
 (define_int_iterator SUBDI_BITS [8 16 32])
 
@@ -4994,11 +5006,15 @@
 
 (define_int_attr atomic_ldop
  [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
-  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+  (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+  (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
 
 (define_int_attr atomic_ldoptab
  [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
-  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")
+  (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin")
+  (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")])
 
 (define_int_attr fp8_cvt_uns_op
   [(UNSPEC_F1CVT "f1cvt")
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
index 19e078d0ee9..6d579f8360a 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c
@@ -1,123 +1,122 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target aarch64_asm_lse_ok } */
 /* { dg-options "-march=armv8-a+lse" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 
 #include "atomic-minmax.x"
 
-/* { dg-final { scan-assembler-not "\tldxr*" } } */
-/* { dg-final { scan-assembler-not "\tldaxr*" } } */
-/* { dg-final { scan-assembler-not "\tstxr*" } } */
-/* { dg-final { scan-assembler-not "\tstlxr*" } } */
+/* { dg-final { scan-assembler-not "\tldxr" } } */
+/* { dg-final { scan-assembler-not "\tldaxr" } } */
+/* { dg-final { scan-assembler-not "\tstxr" } } */
+/* { dg-final { scan-assembler-not "\tstlxr" } } */
 
 /*
 ** test_smin_s8:
 **     ...
-**     casalb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smax_s8:
 **     ...
-**     casalb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsmaxlb        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smin_s16:
 **     ...
-**     casalh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsminah        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smax_s16:
 **     ...
-**     casalh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsmaxalh       w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smin_s32:
 **     ...
-**     casal   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsmin  w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smax_s32:
 **     ...
-**     casal   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldsmaxal        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smin_s64:
 **     ...
-**     casal   x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ldsmina x[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_smax_s64:
 **     ...
-**     casal   x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ldsmax  x[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umin_u8:
 **     ...
-**     casalb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     lduminb w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umax_u8:
 **     ...
-**     casalb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldumaxab        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umin_u16:
 **     ...
-**     casalh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     lduminah        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umax_u16:
 **     ...
-**     casalh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldumaxlh        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umin_u32:
 **     ...
-**     casal   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     lduminal        w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umax_u32:
 **     ...
-**     casal   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldumax  w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umin_u64:
 **     ...
-**     casal   x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ldumin  x[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     ...
 */
 
 /*
 ** test_umax_u64:
 **     ...
-**     casal   x[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     ldumaxal        x[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     ...
 */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
index 74ec877adce..e4962974ea3 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c
@@ -4,16 +4,22 @@
 
 #include "atomic-minmax.x"
 
-/* { dg-final { scan-assembler-not "\tcas*" } } */
-/* { dg-final { scan-assembler-not "__aarch64_*" } } */
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
+
+/* { dg-final { scan-assembler-not "__aarch64_" } } */
 
 /*
 ** test_smin_s8:
 **     ...
 **     ldxrb   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxtb
-**     bne     .*
-**     stlxrb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     sxtb    w[0-9]+, w[0-9]+
+**     sxtb    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -22,8 +28,10 @@
 ** test_smax_s8:
 **     ...
 **     ldxrb   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxtb
-**     bne     .*
+**     sxtb    w[0-9]+, w[0-9]+
+**     sxtb    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
 **     stlxrb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
@@ -32,10 +40,12 @@
 /*
 ** test_smin_s16:
 **     ...
-**     ldxrh   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxth
-**     bne     .*
-**     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     sxth    w[0-9]+, w[0-9]+
+**     sxth    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxrh   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -43,20 +53,23 @@
 /*
 ** test_smax_s16:
 **     ...
-**     ldxrh   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxth
-**     bne     .*
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     sxth    w[0-9]+, w[0-9]+
+**     sxth    w[0-9]+, w[0-9]+
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
 **     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
+
 /*
 ** test_smin_s32:
 **     ...
 **     ldxr    w[0-9]+, \[x[0-9]+\]
 **     cmp     w[0-9]+, w[0-9]+
-**     bne     .*
-**     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, lt
+**     stxr    w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -64,9 +77,9 @@
 /*
 ** test_smax_s32:
 **     ...
-**     ldxr    w[0-9]+, \[x[0-9]+\]
+**     ldaxr   w[0-9]+, \[x[0-9]+\]
 **     cmp     w[0-9]+, w[0-9]+
-**     bne     .*
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, gt
 **     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
@@ -75,21 +88,21 @@
 /*
 ** test_smin_s64:
 **     ...
-**     ldxr    x[0-9]+, \[x[0-9]+\]
+**     ldaxr   x[0-9]+, \[x[0-9]+\]
 **     cmp     x[0-9]+, x[0-9]+
-**     bne     .*
-**     stlxr   w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, lt
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
 
 /*
-** test_smin_s64:
+** test_smax_s64:
 **     ...
 **     ldxr    x[0-9]+, \[x[0-9]+\]
 **     cmp     x[0-9]+, x[0-9]+
-**     bne     .*
-**     stlxr   w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, gt
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -98,9 +111,9 @@
 ** test_umin_u8:
 **     ...
 **     ldxrb   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxtb
-**     bne     .*
-**     stlxrb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -108,10 +121,10 @@
 /*
 ** test_umax_u8:
 **     ...
-**     ldxrb   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxtb
-**     bne     .*
-**     stlxrb  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldaxrb  w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
+**     stxrb   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -119,10 +132,10 @@
 /*
 ** test_umin_u16:
 **     ...
-**     ldxrh   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxth
-**     bne     .*
-**     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     ldaxrh  w[0-9]+, \[x[0-9]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
+**     stxrh   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -131,8 +144,8 @@
 ** test_umax_u16:
 **     ...
 **     ldxrh   w[0-9]+, \[x[0-9]+\]
-**     cmp     w[0-9]+, w[0-9]+, uxth
-**     bne     .*
+**     cmp     w[0-9]+, w[0-9]+
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
 **     stlxrh  w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
@@ -141,9 +154,9 @@
 /*
 ** test_umin_u32:
 **     ...
-**     ldxr    w[0-9]+, \[x[0-9]+\]
+**     ldaxr   w[0-9]+, \[x[0-9]+\]
 **     cmp     w[0-9]+, w[0-9]+
-**     bne     .*
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, cc
 **     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
@@ -154,8 +167,8 @@
 **     ...
 **     ldxr    w[0-9]+, \[x[0-9]+\]
 **     cmp     w[0-9]+, w[0-9]+
-**     bne     .*
-**     stlxr   w[0-9]+, w[0-9]+, \[x[0-9]+\]
+**     csel    w[0-9]+, w[0-9]+, w[0-9]+, hi
+**     stxr    w[0-9]+, w[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -165,8 +178,8 @@
 **     ...
 **     ldxr    x[0-9]+, \[x[0-9]+\]
 **     cmp     x[0-9]+, x[0-9]+
-**     bne     .*
-**     stlxr   w[0-9]+, x[0-9]+, \[x[0-9]+\]
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, cc
+**     stxr    w[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
 */
@@ -174,9 +187,9 @@
 /*
 ** test_umax_u64:
 **     ...
-**     ldxr    x[0-9]+, \[x[0-9]+\]
+**     ldaxr   x[0-9]+, \[x[0-9]+\]
 **     cmp     x[0-9]+, x[0-9]+
-**     bne     .*
+**     csel    x[0-9]+, x[0-9]+, x[0-9]+, hi
 **     stlxr   w[0-9]+, x[0-9]+, \[x[0-9]+\]
 **     cbnz    w[0-9]+, .*
 **     ...
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c 
b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
index f61082e288b..225816b63bd 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c
@@ -10,120 +10,119 @@ int main ()
   return 0;
 }
 
-/* { dg-final { scan-assembler-not "\tcas*" } } */
-/* { dg-final { scan-assembler-not "\tldxr*" } } */
-/* { dg-final { scan-assembler-not "\tldaxr*" } } */
-/* { dg-final { scan-assembler-not "\tstxr*" } } */
-/* { dg-final { scan-assembler-not "\tstlxr*" } } */
+/* { dg-final { scan-assembler-not "\tldsmin" } } */
+/* { dg-final { scan-assembler-not "\tldsmax" } } */
+/* { dg-final { scan-assembler-not "\tldumin" } } */
+/* { dg-final { scan-assembler-not "\tldumax" } } */
 
 /*
 ** test_smin_s8:
 **     ...
-**     bl      __aarch64_cas1_sync
+**     bl      __aarch64_ldsmin1_relax
 **     ...
 */
 
 /*
 ** test_smax_s8:
 **     ...
-**     bl      __aarch64_cas1_sync
+**     bl      __aarch64_ldsmax1_rel
 **     ...
 */
 
 /*
 ** test_smin_s16:
 **     ...
-**     bl      __aarch64_cas2_sync
+**     bl      __aarch64_ldsmin2_acq
 **     ...
 */
 
 /*
 ** test_smax_s16:
 **     ...
-**     bl      __aarch64_cas2_sync
+**     bl      __aarch64_ldsmax2_acq_rel
 **     ...
 */
 
 /*
 ** test_smin_s32:
 **     ...
-**     bl      __aarch64_cas4_sync
+**     bl      __aarch64_ldsmin4_relax
 **     ...
 */
 
 /*
 ** test_smax_s32:
 **     ...
-**     bl      __aarch64_cas4_sync
+**     bl      __aarch64_ldsmax4_acq_rel
 **     ...
 */
 
 /*
 ** test_smin_s64:
 **     ...
-**     bl      __aarch64_cas8_sync
+**     bl      __aarch64_ldsmin8_acq
 **     ...
 */
 
 /*
 ** test_smax_s64:
 **     ...
-**     bl      __aarch64_cas8_sync
+**     bl      __aarch64_ldsmax8_relax
 **     ...     
 */
 
 /*
 ** test_umin_u8:
 **     ...
-**     bl      __aarch64_cas1_sync
+**     bl      __aarch64_ldumin1_relax
 **     ...
 */
 
 /*
 ** test_umax_u8:
 **     ...
-**     bl      __aarch64_cas1_sync
+**     bl      __aarch64_ldumax1_acq
 **     ...
 */
 
 /*
 ** test_umin_u16:
 **     ...
-**     bl      __aarch64_cas2_sync
+**     bl      __aarch64_ldumin2_acq
 **     ...
 */
 
 /*
 ** test_umax_u16:
 **     ...
-**     bl      __aarch64_cas2_sync
+**     bl      __aarch64_ldumax2_rel
 **     ...
 */
 
 /*
 ** test_umin_u32:
 **     ...
-**     bl      __aarch64_cas4_sync
+**     bl      __aarch64_ldumin4_acq_rel
 **     ...
 */
 
 /*
 ** test_umax_u32:
 **     ...
-**     bl      __aarch64_cas4_sync
+**     bl      __aarch64_ldumax4_relax
 **     ...
 */
 
 /*
 ** test_umin_u64:
 **     ...
-**     bl      __aarch64_cas8_sync
+**     bl      __aarch64_ldumin8_relax
 **     ...
 */
 
 /*
 ** test_umax_u64:
 **     ...
-**     bl      __aarch64_cas8_sync
+**     bl      __aarch64_ldumax8_acq_rel
 **     ...     
 */
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
index cb24011d883..c251d5d0188 100644
--- a/libgcc/config/aarch64/lse.S
+++ b/libgcc/config/aarch64/lse.S
@@ -276,7 +276,9 @@ ENDFN       NAME(swp)
 #endif
 
 #if defined(L_ldadd) || defined(L_ldclr) \
-    || defined(L_ldeor) || defined(L_ldset)
+    || defined(L_ldeor) || defined(L_ldset) \
+    || defined(L_ldsmin) || defined(L_ldsmax) \
+    || defined(L_ldumin) || defined(L_ldumax)
 
 #ifdef L_ldadd
 #define LDNM   ldadd
@@ -294,6 +296,26 @@ ENDFN      NAME(swp)
 #define LDNM   ldset
 #define OP     orr
 #define OPN    0x3000
+#elif defined(L_ldsmin)
+#define LDNM   ldsmin
+#define OP     smin
+#define OPN    0x5000
+#define IS_MINMAX 1
+#elif defined(L_ldsmax)
+#define LDNM   ldsmax
+#define OP     smax
+#define OPN    0x4000
+#define IS_MINMAX 1
+#elif defined(L_ldumin)
+#define LDNM   ldumin
+#define OP     umin
+#define OPN    0x7000
+#define IS_MINMAX 1
+#elif defined(L_ldumax)
+#define LDNM   ldumax
+#define OP     umax
+#define OPN    0x6000
+#define IS_MINMAX 1
 #else
 #error
 #endif
@@ -311,7 +333,45 @@ STARTFN    NAME(LDNM)
 
 8:     mov             s(tmp0), s(0)
 0:     LDXR            s(0), [x1]
+#ifdef IS_MINMAX
+       /* For min/max, extend if needed, compare, and select.  */
+#if SIZE < 4
+  #if defined(L_ldsmin) || defined(L_ldsmax)
+    /* Sign extend for signed comparisons.  */
+    #if SIZE == 1
+       sxtb            w(tmp1), w(0)
+       sxtb            w(tmp3), w(tmp0)
+    #else /* SIZE == 2 */
+       sxth            w(tmp1), w(0)
+       sxth            w(tmp3), w(tmp0)
+    #endif
+  #else /* L_ldumin || L_ldumax */
+    /* Zero extend for unsigned comparisons.  */
+    #if SIZE == 1
+       uxtb            w(tmp1), w(0)
+       uxtb            w(tmp3), w(tmp0)
+    #else /* SIZE == 2 */
+       uxth            w(tmp1), w(0)
+       uxth            w(tmp3), w(tmp0)
+    #endif
+  #endif
+       cmp             w(tmp3), w(tmp1)
+#else /* SIZE >= 4 */
+       cmp             s(tmp0), s(0)
+#endif
+  /* Select based on condition.  */
+  #if defined(L_ldsmin)
+       csel            s(tmp1), s(tmp0), s(0), lt
+  #elif defined(L_ldsmax)
+       csel            s(tmp1), s(tmp0), s(0), gt
+  #elif defined(L_ldumin)
+       csel            s(tmp1), s(tmp0), s(0), lo
+  #elif defined(L_ldumax)
+       csel            s(tmp1), s(tmp0), s(0), hi
+  #endif
+#else /* Not IS_MINMAX */
        OP              s(tmp1), s(0), s(tmp0)
+#endif /* IS_MINMAX */
        STXR            w(tmp2), s(tmp1), [x1]
        cbnz            w(tmp2), 0b
        BARRIER
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
index 58908dcabfb..cc0544cf0c8 100644
--- a/libgcc/config/aarch64/t-lse
+++ b/libgcc/config/aarch64/t-lse
@@ -23,7 +23,8 @@ S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
 O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0)))
 
 # Swap, Load-and-operate have 4 sizes and 5 memory models
-S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset \
+                             ldsmin ldsmax ldumin ldumax))
 O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1)))
 
 LSE_OBJS := $(O0) $(O1)
-- 
2.44.0

[PATCH 6/8] aarch64: Add backend support for atomic fetch min/max operations

Reply via email to