From: Soumya AR <soum...@nvidia.com> This patch adds support for atomic min/max instructions offered by aarch64 under LSE.
The implementation provides three execution paths: 1. When LSE is available at compile time (-march=armv8.1-a or later): Emits LSE atomic min/max instructions (ldsmin, ldsmax, ldumin, ldumax). 2. When LSE availability is unknown at compile time: Uses outline atomics - calls to libgcc functions that perform runtime detection for LSE and dispatch to either LSE instructions or LL/SC sequences. 3. When outline atomics are explicitly disabled (-mno-outline-atomics) on non-LSE targets: Emits inline LL/SC (LDXR, STXR etc) sequences using conditional select instructions for min/max. --- For op_fetch varaints, we first generate the appropriate fetch_op variant, then use aarch64_split_atomic_op to generate the same operation (non-atomically) to return the updated value. This function is extended to handle the min/max operations. We have to be careful about QI/HI modes, as ldxr and its variants do a zero extended load, so it's important to explicitly sign extend the values before comparing them. --- lse.S is responsible for emitting the appropriate LSE or non-LSE sequence. For min/max on non-LSE systems, this is done using a conditional select. There is, however, a unique case where systems with the CSSC extension have native min/max instructions as well. In that case, it would be preferable to emit the LL/SC sequence using the native min/max instructions. But, this would only occur on targets with CSSC but without LSE, which is quite improbable, and thus, I haven't added special handling for the CSSC feature. --- We now overwrite our previous tests since CAS loops will no longer be emitted. This is done to test for correctness in a consistent manner. Bootstrapped and regression tested on aarch64-linux-gnu and x86_64-linux-gnu. Cross-compiled and regression tested for arm-linux-gnueabihf-armv7-a and aarch64-linux-gnu without LSE. Signed-off-by: Soumya AR <soum...@nvidia.com> gcc/ChangeLog: * config/aarch64/aarch64-protos.h: Add declarations for new outline atomic min/max name structures. * config/aarch64/aarch64.cc (DEF4): Define names for outline atomic min/max functions. (aarch64_ool_ldsmin_names, aarch64_ool_ldsmax_names, aarch64_ool_ldumin_names, aarch64_ool_ldumax_names): New. (aarch64_split_atomic_op): Add support for SMIN, SMAX, UMIN, UMAX operations with sign extension for QI/HI modes. * config/aarch64/atomics.md: Add LSE and outline atomics support for atomic fetch min/max operations. * config/aarch64/iterators.md: Add min/max iterators. libgcc/ChangeLog: * config/aarch64/lse.S: Implement outline atomic min/max functions. * config/aarch64/t-lse: Add min/max function entries. gcc/testsuite/ChangeLog: * gcc.target/aarch64/atomic-minmax-lse.c: Modified for LSE min/max. * gcc.target/aarch64/atomic-minmax-nolse.c: Modified to check for non-LSE atomic min/max. * gcc.target/aarch64/atomic-minmax.c: Modified to check for min/max libcalls. --- gcc/config/aarch64/aarch64-protos.h | 4 + gcc/config/aarch64/aarch64.cc | 51 +++++++++ gcc/config/aarch64/atomics.md | 54 ++++++++- gcc/config/aarch64/iterators.md | 30 +++-- .../gcc.target/aarch64/atomic-minmax-lse.c | 41 ++++--- .../gcc.target/aarch64/atomic-minmax-nolse.c | 103 ++++++++++-------- .../gcc.target/aarch64/atomic-minmax.c | 41 ++++--- libgcc/config/aarch64/lse.S | 62 ++++++++++- libgcc/config/aarch64/t-lse | 3 +- 9 files changed, 292 insertions(+), 97 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 36bd88593ff..a152d76da0e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1256,6 +1256,10 @@ extern const atomic_ool_names aarch64_ool_ldadd_names; extern const atomic_ool_names aarch64_ool_ldset_names; extern const atomic_ool_names aarch64_ool_ldclr_names; extern const atomic_ool_names aarch64_ool_ldeor_names; +extern const atomic_ool_names aarch64_ool_ldsmin_names; +extern const atomic_ool_names aarch64_ool_ldsmax_names; +extern const atomic_ool_names aarch64_ool_ldumin_names; +extern const atomic_ool_names aarch64_ool_ldumax_names; tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 2dbaf4a8e59..99872bf45fe 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -25745,6 +25745,10 @@ const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } }; const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } }; +const atomic_ool_names aarch64_ool_ldsmin_names = { { DEF4(ldsmin) } }; +const atomic_ool_names aarch64_ool_ldsmax_names = { { DEF4(ldsmax) } }; +const atomic_ool_names aarch64_ool_ldumin_names = { { DEF4(ldumin) } }; +const atomic_ool_names aarch64_ool_ldumax_names = { { DEF4(ldumax) } }; #undef DEF0 #undef DEF4 @@ -25997,6 +26001,53 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, emit_insn (gen_rtx_SET (new_out, x)); break; + case SMIN: + case SMAX: + case UMIN: + case UMAX: + { + rtx_code cmp_code; + switch (code) + { + case SMIN: + cmp_code = LT; + break; + case SMAX: + cmp_code = GT; + break; + case UMIN: + cmp_code = LTU; + break; + case UMAX: + cmp_code = GTU; + break; + default: + gcc_unreachable (); + } + + if ((code == SMIN || code == SMAX) && (mode == QImode || mode == HImode)) + { + rtx old_extended = gen_rtx_REG (wmode, REGNO (old_out)); + emit_insn ( + gen_rtx_SET (old_extended, + gen_rtx_SIGN_EXTEND (wmode, + gen_lowpart (mode, old_out)))); + old_out = old_extended; + + rtx value_extended = gen_rtx_REG (wmode, REGNO (value)); + emit_insn ( + gen_rtx_SET (value_extended, + gen_rtx_SIGN_EXTEND (wmode, + gen_lowpart (mode, value)))); + value = value_extended; + } + rtx cc_reg = aarch64_gen_compare_reg (cmp_code, old_out, value); + rtx cond = gen_rtx_fmt_ee (cmp_code, VOIDmode, cc_reg, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (wmode, cond, old_out, value); + emit_insn (gen_rtx_SET (new_out, x)); + break; + } + case MINUS: if (CONST_INT_P (value)) { diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index ea4a9367fc8..64b76f3df12 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -284,6 +284,18 @@ case XOR: gen = gen_aarch64_atomic_xor<mode>_lse; break; + case SMAX: + gen = gen_aarch64_atomic_smax<mode>_lse; + break; + case SMIN: + gen = gen_aarch64_atomic_smin<mode>_lse; + break; + case UMAX: + gen = gen_aarch64_atomic_umax<mode>_lse; + break; + case UMIN: + gen = gen_aarch64_atomic_umin<mode>_lse; + break; case AND: operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1], NULL, 1); @@ -317,6 +329,18 @@ NULL, 1); names = &aarch64_ool_ldclr_names; break; + case SMIN: + names = &aarch64_ool_ldsmin_names; + break; + case SMAX: + names = &aarch64_ool_ldsmax_names; + break; + case UMIN: + names = &aarch64_ool_ldumin_names; + break; + case UMAX: + names = &aarch64_ool_ldumax_names; + break; default: gcc_unreachable (); } @@ -442,6 +466,18 @@ case XOR: gen = gen_aarch64_atomic_fetch_xor<mode>_lse; break; + case SMAX: + gen = gen_aarch64_atomic_fetch_smax<mode>_lse; + break; + case SMIN: + gen = gen_aarch64_atomic_fetch_smin<mode>_lse; + break; + case UMAX: + gen = gen_aarch64_atomic_fetch_umax<mode>_lse; + break; + case UMIN: + gen = gen_aarch64_atomic_fetch_umin<mode>_lse; + break; case AND: operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2], NULL, 1); @@ -475,6 +511,18 @@ NULL, 1); names = &aarch64_ool_ldclr_names; break; + case SMIN: + names = &aarch64_ool_ldsmin_names; + break; + case SMAX: + names = &aarch64_ool_ldsmax_names; + break; + case UMIN: + names = &aarch64_ool_ldumin_names; + break; + case UMAX: + names = &aarch64_ool_ldumax_names; + break; default: gcc_unreachable (); } @@ -581,7 +629,11 @@ operands[2] = force_reg (<MODE>mode, operands[2]); emit_insn (gen_atomic_fetch_<atomic_optab><mode> (tmp, operands[1], operands[2], operands[3])); - tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], + if (<CODE> == SMIN || <CODE> == SMAX) + tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], + operands[0], 0, OPTAB_WIDEN); + else + tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2], operands[0], 1, OPTAB_WIDEN); emit_move_insn (operands[0], tmp); } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8f8237edf6c..fdb15fb212a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1302,6 +1302,10 @@ UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor UNSPECV_ATOMIC_LDOP_PLUS ; Represent an atomic load-add + UNSPECV_ATOMIC_LDOP_SMAX ; Represent an atomic load-smax + UNSPECV_ATOMIC_LDOP_SMIN ; Represent an atomic load-smin + UNSPECV_ATOMIC_LDOP_UMAX ; Represent an atomic load-umax + UNSPECV_ATOMIC_LDOP_UMIN ; Represent an atomic load-umin ]) ;; ------------------------------------------------------------------- @@ -2782,7 +2786,7 @@ ;; Iterator for __sync_<op> operations that where the operation can be ;; represented directly RTL. This is all of the sync operations bar ;; nand. -(define_code_iterator atomic_op [plus minus ior xor and]) +(define_code_iterator atomic_op [plus minus ior xor and smin smax umin umax]) ;; Iterator for integer conversions (define_code_iterator FIXUORS [fix unsigned_fix]) @@ -3095,21 +3099,27 @@ ;; Atomic operations (define_code_attr atomic_optab - [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub")]) + [(ior "or") (xor "xor") (and "and") (plus "add") (minus "sub") + (smin "smin") (smax "smax") (umin "umin") (umax "umax")]) (define_code_attr atomic_op_operand [(ior "aarch64_logical_operand") (xor "aarch64_logical_operand") (and "aarch64_logical_operand") (plus "aarch64_plus_operand") - (minus "aarch64_plus_operand")]) + (minus "aarch64_plus_operand") + (smin "aarch64_sminmax_operand") + (smax "aarch64_sminmax_operand") + (umin "aarch64_uminmax_operand") + (umax "aarch64_uminmax_operand")]) ;; Constants acceptable for atomic operations. ;; This definition must appear in this file before the iterators it refers to. (define_code_attr const_atomic [(plus "IJ") (minus "IJ") (xor "<lconst_atomic>") (ior "<lconst_atomic>") - (and "<lconst_atomic>")]) + (and "<lconst_atomic>") + (smin "") (smax "") (umin "") (umax "")]) ;; Attribute to describe constants acceptable in atomic logical operations (define_mode_attr lconst_atomic [(QI "K") (HI "K") (SI "K") (DI "L")]) @@ -3948,7 +3958,9 @@ (define_int_iterator ATOMIC_LDOP [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC - UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS]) + UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS + UNSPECV_ATOMIC_LDOP_SMAX UNSPECV_ATOMIC_LDOP_SMIN + UNSPECV_ATOMIC_LDOP_UMAX UNSPECV_ATOMIC_LDOP_UMIN]) (define_int_iterator SUBDI_BITS [8 16 32]) @@ -4994,11 +5006,15 @@ (define_int_attr atomic_ldop [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr") - (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) + (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add") + (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin") + (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")]) (define_int_attr atomic_ldoptab [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic") - (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")]) + (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add") + (UNSPECV_ATOMIC_LDOP_SMAX "smax") (UNSPECV_ATOMIC_LDOP_SMIN "smin") + (UNSPECV_ATOMIC_LDOP_UMAX "umax") (UNSPECV_ATOMIC_LDOP_UMIN "umin")]) (define_int_attr fp8_cvt_uns_op [(UNSPEC_F1CVT "f1cvt") diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c index 19e078d0ee9..6d579f8360a 100644 --- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-lse.c @@ -1,123 +1,122 @@ /* { dg-do compile } */ -/* { dg-require-effective-target aarch64_asm_lse_ok } */ /* { dg-options "-march=armv8-a+lse" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "atomic-minmax.x" -/* { dg-final { scan-assembler-not "\tldxr*" } } */ -/* { dg-final { scan-assembler-not "\tldaxr*" } } */ -/* { dg-final { scan-assembler-not "\tstxr*" } } */ -/* { dg-final { scan-assembler-not "\tstlxr*" } } */ +/* { dg-final { scan-assembler-not "\tldxr" } } */ +/* { dg-final { scan-assembler-not "\tldaxr" } } */ +/* { dg-final { scan-assembler-not "\tstxr" } } */ +/* { dg-final { scan-assembler-not "\tstlxr" } } */ /* ** test_smin_s8: ** ... -** casalb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsminb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smax_s8: ** ... -** casalb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsmaxlb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smin_s16: ** ... -** casalh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsminah w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smax_s16: ** ... -** casalh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsmaxalh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smin_s32: ** ... -** casal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsmin w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smax_s32: ** ... -** casal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldsmaxal w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smin_s64: ** ... -** casal x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ldsmina x[0-9]+, x[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_smax_s64: ** ... -** casal x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ldsmax x[0-9]+, x[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umin_u8: ** ... -** casalb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** lduminb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umax_u8: ** ... -** casalb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldumaxab w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umin_u16: ** ... -** casalh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** lduminah w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umax_u16: ** ... -** casalh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldumaxlh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umin_u32: ** ... -** casal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** lduminal w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umax_u32: ** ... -** casal w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldumax w[0-9]+, w[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umin_u64: ** ... -** casal x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ldumin x[0-9]+, x[0-9]+, \[x[0-9]+\] ** ... */ /* ** test_umax_u64: ** ... -** casal x[0-9]+, x[0-9]+, \[x[0-9]+\] +** ldumaxal x[0-9]+, x[0-9]+, \[x[0-9]+\] ** ... */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c index 74ec877adce..e4962974ea3 100644 --- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax-nolse.c @@ -4,16 +4,22 @@ #include "atomic-minmax.x" -/* { dg-final { scan-assembler-not "\tcas*" } } */ -/* { dg-final { scan-assembler-not "__aarch64_*" } } */ +/* { dg-final { scan-assembler-not "\tldsmin" } } */ +/* { dg-final { scan-assembler-not "\tldsmax" } } */ +/* { dg-final { scan-assembler-not "\tldumin" } } */ +/* { dg-final { scan-assembler-not "\tldumax" } } */ + +/* { dg-final { scan-assembler-not "__aarch64_" } } */ /* ** test_smin_s8: ** ... ** ldxrb w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxtb -** bne .* -** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** sxtb w[0-9]+, w[0-9]+ +** sxtb w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -22,8 +28,10 @@ ** test_smax_s8: ** ... ** ldxrb w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxtb -** bne .* +** sxtb w[0-9]+, w[0-9]+ +** sxtb w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt ** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... @@ -32,10 +40,12 @@ /* ** test_smin_s16: ** ... -** ldxrh w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxth -** bne .* -** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldaxrh w[0-9]+, \[x[0-9]+\] +** sxth w[0-9]+, w[0-9]+ +** sxth w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -43,20 +53,23 @@ /* ** test_smax_s16: ** ... -** ldxrh w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxth -** bne .* +** ldaxrh w[0-9]+, \[x[0-9]+\] +** sxth w[0-9]+, w[0-9]+ +** sxth w[0-9]+, w[0-9]+ +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt ** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ + /* ** test_smin_s32: ** ... ** ldxr w[0-9]+, \[x[0-9]+\] ** cmp w[0-9]+, w[0-9]+ -** bne .* -** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** csel w[0-9]+, w[0-9]+, w[0-9]+, lt +** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -64,9 +77,9 @@ /* ** test_smax_s32: ** ... -** ldxr w[0-9]+, \[x[0-9]+\] +** ldaxr w[0-9]+, \[x[0-9]+\] ** cmp w[0-9]+, w[0-9]+ -** bne .* +** csel w[0-9]+, w[0-9]+, w[0-9]+, gt ** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... @@ -75,21 +88,21 @@ /* ** test_smin_s64: ** ... -** ldxr x[0-9]+, \[x[0-9]+\] +** ldaxr x[0-9]+, \[x[0-9]+\] ** cmp x[0-9]+, x[0-9]+ -** bne .* -** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** csel x[0-9]+, x[0-9]+, x[0-9]+, lt +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ /* -** test_smin_s64: +** test_smax_s64: ** ... ** ldxr x[0-9]+, \[x[0-9]+\] ** cmp x[0-9]+, x[0-9]+ -** bne .* -** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** csel x[0-9]+, x[0-9]+, x[0-9]+, gt +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -98,9 +111,9 @@ ** test_umin_u8: ** ... ** ldxrb w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxtb -** bne .* -** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -108,10 +121,10 @@ /* ** test_umax_u8: ** ... -** ldxrb w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxtb -** bne .* -** stlxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldaxrb w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi +** stxrb w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -119,10 +132,10 @@ /* ** test_umin_u16: ** ... -** ldxrh w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxth -** bne .* -** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] +** ldaxrh w[0-9]+, \[x[0-9]+\] +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc +** stxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -131,8 +144,8 @@ ** test_umax_u16: ** ... ** ldxrh w[0-9]+, \[x[0-9]+\] -** cmp w[0-9]+, w[0-9]+, uxth -** bne .* +** cmp w[0-9]+, w[0-9]+ +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi ** stlxrh w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... @@ -141,9 +154,9 @@ /* ** test_umin_u32: ** ... -** ldxr w[0-9]+, \[x[0-9]+\] +** ldaxr w[0-9]+, \[x[0-9]+\] ** cmp w[0-9]+, w[0-9]+ -** bne .* +** csel w[0-9]+, w[0-9]+, w[0-9]+, cc ** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... @@ -154,8 +167,8 @@ ** ... ** ldxr w[0-9]+, \[x[0-9]+\] ** cmp w[0-9]+, w[0-9]+ -** bne .* -** stlxr w[0-9]+, w[0-9]+, \[x[0-9]+\] +** csel w[0-9]+, w[0-9]+, w[0-9]+, hi +** stxr w[0-9]+, w[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -165,8 +178,8 @@ ** ... ** ldxr x[0-9]+, \[x[0-9]+\] ** cmp x[0-9]+, x[0-9]+ -** bne .* -** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\] +** csel x[0-9]+, x[0-9]+, x[0-9]+, cc +** stxr w[0-9]+, x[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... */ @@ -174,9 +187,9 @@ /* ** test_umax_u64: ** ... -** ldxr x[0-9]+, \[x[0-9]+\] +** ldaxr x[0-9]+, \[x[0-9]+\] ** cmp x[0-9]+, x[0-9]+ -** bne .* +** csel x[0-9]+, x[0-9]+, x[0-9]+, hi ** stlxr w[0-9]+, x[0-9]+, \[x[0-9]+\] ** cbnz w[0-9]+, .* ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c index f61082e288b..225816b63bd 100644 --- a/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c +++ b/gcc/testsuite/gcc.target/aarch64/atomic-minmax.c @@ -10,120 +10,119 @@ int main () return 0; } -/* { dg-final { scan-assembler-not "\tcas*" } } */ -/* { dg-final { scan-assembler-not "\tldxr*" } } */ -/* { dg-final { scan-assembler-not "\tldaxr*" } } */ -/* { dg-final { scan-assembler-not "\tstxr*" } } */ -/* { dg-final { scan-assembler-not "\tstlxr*" } } */ +/* { dg-final { scan-assembler-not "\tldsmin" } } */ +/* { dg-final { scan-assembler-not "\tldsmax" } } */ +/* { dg-final { scan-assembler-not "\tldumin" } } */ +/* { dg-final { scan-assembler-not "\tldumax" } } */ /* ** test_smin_s8: ** ... -** bl __aarch64_cas1_sync +** bl __aarch64_ldsmin1_relax ** ... */ /* ** test_smax_s8: ** ... -** bl __aarch64_cas1_sync +** bl __aarch64_ldsmax1_rel ** ... */ /* ** test_smin_s16: ** ... -** bl __aarch64_cas2_sync +** bl __aarch64_ldsmin2_acq ** ... */ /* ** test_smax_s16: ** ... -** bl __aarch64_cas2_sync +** bl __aarch64_ldsmax2_acq_rel ** ... */ /* ** test_smin_s32: ** ... -** bl __aarch64_cas4_sync +** bl __aarch64_ldsmin4_relax ** ... */ /* ** test_smax_s32: ** ... -** bl __aarch64_cas4_sync +** bl __aarch64_ldsmax4_acq_rel ** ... */ /* ** test_smin_s64: ** ... -** bl __aarch64_cas8_sync +** bl __aarch64_ldsmin8_acq ** ... */ /* ** test_smax_s64: ** ... -** bl __aarch64_cas8_sync +** bl __aarch64_ldsmax8_relax ** ... */ /* ** test_umin_u8: ** ... -** bl __aarch64_cas1_sync +** bl __aarch64_ldumin1_relax ** ... */ /* ** test_umax_u8: ** ... -** bl __aarch64_cas1_sync +** bl __aarch64_ldumax1_acq ** ... */ /* ** test_umin_u16: ** ... -** bl __aarch64_cas2_sync +** bl __aarch64_ldumin2_acq ** ... */ /* ** test_umax_u16: ** ... -** bl __aarch64_cas2_sync +** bl __aarch64_ldumax2_rel ** ... */ /* ** test_umin_u32: ** ... -** bl __aarch64_cas4_sync +** bl __aarch64_ldumin4_acq_rel ** ... */ /* ** test_umax_u32: ** ... -** bl __aarch64_cas4_sync +** bl __aarch64_ldumax4_relax ** ... */ /* ** test_umin_u64: ** ... -** bl __aarch64_cas8_sync +** bl __aarch64_ldumin8_relax ** ... */ /* ** test_umax_u64: ** ... -** bl __aarch64_cas8_sync +** bl __aarch64_ldumax8_acq_rel ** ... */ diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S index cb24011d883..c251d5d0188 100644 --- a/libgcc/config/aarch64/lse.S +++ b/libgcc/config/aarch64/lse.S @@ -276,7 +276,9 @@ ENDFN NAME(swp) #endif #if defined(L_ldadd) || defined(L_ldclr) \ - || defined(L_ldeor) || defined(L_ldset) + || defined(L_ldeor) || defined(L_ldset) \ + || defined(L_ldsmin) || defined(L_ldsmax) \ + || defined(L_ldumin) || defined(L_ldumax) #ifdef L_ldadd #define LDNM ldadd @@ -294,6 +296,26 @@ ENDFN NAME(swp) #define LDNM ldset #define OP orr #define OPN 0x3000 +#elif defined(L_ldsmin) +#define LDNM ldsmin +#define OP smin +#define OPN 0x5000 +#define IS_MINMAX 1 +#elif defined(L_ldsmax) +#define LDNM ldsmax +#define OP smax +#define OPN 0x4000 +#define IS_MINMAX 1 +#elif defined(L_ldumin) +#define LDNM ldumin +#define OP umin +#define OPN 0x7000 +#define IS_MINMAX 1 +#elif defined(L_ldumax) +#define LDNM ldumax +#define OP umax +#define OPN 0x6000 +#define IS_MINMAX 1 #else #error #endif @@ -311,7 +333,45 @@ STARTFN NAME(LDNM) 8: mov s(tmp0), s(0) 0: LDXR s(0), [x1] +#ifdef IS_MINMAX + /* For min/max, extend if needed, compare, and select. */ +#if SIZE < 4 + #if defined(L_ldsmin) || defined(L_ldsmax) + /* Sign extend for signed comparisons. */ + #if SIZE == 1 + sxtb w(tmp1), w(0) + sxtb w(tmp3), w(tmp0) + #else /* SIZE == 2 */ + sxth w(tmp1), w(0) + sxth w(tmp3), w(tmp0) + #endif + #else /* L_ldumin || L_ldumax */ + /* Zero extend for unsigned comparisons. */ + #if SIZE == 1 + uxtb w(tmp1), w(0) + uxtb w(tmp3), w(tmp0) + #else /* SIZE == 2 */ + uxth w(tmp1), w(0) + uxth w(tmp3), w(tmp0) + #endif + #endif + cmp w(tmp3), w(tmp1) +#else /* SIZE >= 4 */ + cmp s(tmp0), s(0) +#endif + /* Select based on condition. */ + #if defined(L_ldsmin) + csel s(tmp1), s(tmp0), s(0), lt + #elif defined(L_ldsmax) + csel s(tmp1), s(tmp0), s(0), gt + #elif defined(L_ldumin) + csel s(tmp1), s(tmp0), s(0), lo + #elif defined(L_ldumax) + csel s(tmp1), s(tmp0), s(0), hi + #endif +#else /* Not IS_MINMAX */ OP s(tmp1), s(0), s(tmp0) +#endif /* IS_MINMAX */ STXR w(tmp2), s(tmp1), [x1] cbnz w(tmp2), 0b BARRIER diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse index 58908dcabfb..cc0544cf0c8 100644 --- a/libgcc/config/aarch64/t-lse +++ b/libgcc/config/aarch64/t-lse @@ -23,7 +23,8 @@ S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) O0 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S0))) # Swap, Load-and-operate have 4 sizes and 5 memory models -S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset \ + ldsmin ldsmax ldumin ldumax)) O1 := $(foreach m, 1 2 3 4 5, $(addsuffix _$(m)$(objext), $(S1))) LSE_OBJS := $(O0) $(O1) -- 2.44.0