The introduction of the optional RCPC3 architectural extension for Armv8.2-A upwards provides additional support for the release consistency model, introducing the Load-Acquire RCpc Pair Ordered, and Store-Release Pair Ordered operations in the form of LDIAPP and STILP.
These operations are single-copy atomic on cores which also implement LSE2 and, as such, support for these operations is added to Libatomic and employed accordingly when the LSE2 and RCPC3 features are detected in a given core at runtime. libatomic/ChangeLog: * configure.ac: Add call to LIBAT_TEST_FEAT_LRCPC3() test. * configure: Regenerate. * config/linux/aarch64/host-config.h (HAS_LRCPC3): New. (has_rcpc3): Likewise. (HWCAP2_LRCPC3): Likewise. * config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3 variant. (libat_store_16): Likewise. * acinclude.m4 (LIBAT_TEST_FEAT_AARCH64_LRCPC3): New. (HAVE_FEAT_LRCPC3): Likewise (ARCH_AARCH64_HAVE_LRCPC3): Likewise. * Makefile.am (AM_CPPFLAGS): Conditionally append -DHAVE_FEAT_LRCPC3 flag. --- libatomic/Makefile.am | 6 +- libatomic/Makefile.in | 22 ++-- libatomic/acinclude.m4 | 19 ++++ libatomic/auto-config.h.in | 3 + libatomic/config/linux/aarch64/atomic_16.S | 102 ++++++++++++++++++- libatomic/config/linux/aarch64/host-config.h | 33 +++++- libatomic/configure | 59 ++++++++++- libatomic/configure.ac | 1 + 8 files changed, 225 insertions(+), 20 deletions(-) diff --git a/libatomic/Makefile.am b/libatomic/Makefile.am index 0623a0bf2d1..1e5481fa580 100644 --- a/libatomic/Makefile.am +++ b/libatomic/Makefile.am @@ -130,8 +130,12 @@ libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix _$(s)_.lo,$(SIZEOBJS))) ## On a target-specific basis, include alternates to be selected by IFUNC. if HAVE_IFUNC if ARCH_AARCH64_LINUX +AM_CPPFLAGS = if ARCH_AARCH64_HAVE_LSE128 -AM_CPPFLAGS = -DHAVE_FEAT_LSE128 +AM_CPPFLAGS += -DHAVE_FEAT_LSE128 +endif +if ARCH_AARCH64_HAVE_LRCPC3 +AM_CPPFLAGS += -DHAVE_FEAT_LRCPC3 endif IFUNC_OPTIONS = -march=armv8-a+lse libatomic_la_LIBADD += $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) diff --git a/libatomic/Makefile.in b/libatomic/Makefile.in index cd48fa21334..8e87d12907a 100644 --- a/libatomic/Makefile.in +++ b/libatomic/Makefile.in @@ -89,15 +89,17 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) -@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = atomic_16.S -@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach \ +@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_1 = -DHAVE_FEAT_LSE128 +@ARCH_AARCH64_HAVE_LRCPC3_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_2 = -DHAVE_FEAT_LRCPC3 +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_3 = $(foreach s,$(SIZES),$(addsuffix _$(s)_1_.lo,$(SIZEOBJS))) +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = atomic_16.S +@ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(foreach \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ s,$(SIZES),$(addsuffix \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _$(s)_1_.lo,$(SIZEOBJS))) \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix \ @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@ _8_2_.lo,$(SIZEOBJS)) -@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_4 = $(addsuffix _8_1_.lo,$(SIZEOBJS)) -@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_5 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ +@ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@am__append_6 = $(addsuffix _8_1_.lo,$(SIZEOBJS)) +@ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@am__append_7 = $(addsuffix _16_1_.lo,$(SIZEOBJS)) \ @ARCH_X86_64_TRUE@@HAVE_IFUNC_TRUE@ $(addsuffix _16_2_.lo,$(SIZEOBJS)) subdir = . @@ -424,7 +426,7 @@ libatomic_la_LDFLAGS = $(libatomic_version_info) $(libatomic_version_script) \ $(lt_host_flags) $(libatomic_darwin_rpath) libatomic_la_SOURCES = gload.c gstore.c gcas.c gexch.c glfree.c lock.c \ - init.c fenv.c fence.c flag.c $(am__append_2) + init.c fenv.c fence.c flag.c $(am__append_4) SIZEOBJS = load store cas exch fadd fsub fand fior fxor fnand tas EXTRA_libatomic_la_SOURCES = $(addsuffix _n.c,$(SIZEOBJS)) libatomic_la_DEPENDENCIES = $(libatomic_la_LIBADD) $(libatomic_version_dep) @@ -450,9 +452,11 @@ all_c_files := $(foreach dir,$(search_path),$(wildcard $(dir)/*.c)) # Then sort through them to find the one we want, and select the first. M_SRC = $(firstword $(filter %/$(M_FILE), $(all_c_files))) libatomic_la_LIBADD = $(foreach s,$(SIZES),$(addsuffix \ - _$(s)_.lo,$(SIZEOBJS))) $(am__append_1) $(am__append_3) \ - $(am__append_4) $(am__append_5) -@ARCH_AARCH64_HAVE_LSE128_TRUE@@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS = -DHAVE_FEAT_LSE128 + _$(s)_.lo,$(SIZEOBJS))) $(am__append_3) $(am__append_5) \ + $(am__append_6) $(am__append_7) +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@AM_CPPFLAGS = \ +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(am__append_1) \ +@ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@ $(am__append_2) @ARCH_AARCH64_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv8-a+lse @ARCH_ARM_LINUX_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=armv7-a+fp -DHAVE_KERNEL64 @ARCH_I386_TRUE@@HAVE_IFUNC_TRUE@IFUNC_OPTIONS = -march=i586 diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4 index d4f13174e2c..316a8baf75f 100644 --- a/libatomic/acinclude.m4 +++ b/libatomic/acinclude.m4 @@ -102,6 +102,25 @@ AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LSE128],[ AM_CONDITIONAL([ARCH_AARCH64_HAVE_LSE128], [test x$libat_cv_have_feat_lse128 = xyes]) ]) +dnl +dnl Test if the host assembler supports armv8.2-a RCPC3 isns. +dnl +AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LRCPC3],[ + AC_CACHE_CHECK([for armv8.2-a LRCPC3 insn support], + [libat_cv_have_feat_lrcpc3],[ + AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv8.2-a+rcpc3")])]) + if AC_TRY_EVAL(ac_link); then + eval libat_cv_have_feat_lrcpc3=yes + else + eval libat_cv_have_feat_lrcpc3=no + fi + rm -f conftest* + ]) + LIBAT_DEFINE_YESNO([HAVE_FEAT_LRCPC3], [$libat_cv_have_feat_lrcpc3], + [Have LRCPC3 support for 16 byte integers.]) + AM_CONDITIONAL([ARCH_AARCH64_HAVE_LRCPC3], [test x$libat_cv_have_feat_lrcpc3 = xyes]) +]) + dnl dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2 dnl diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in index 7c78933b07d..26d56e7da67 100644 --- a/libatomic/auto-config.h.in +++ b/libatomic/auto-config.h.in @@ -105,6 +105,9 @@ /* Define to 1 if you have the <dlfcn.h> header file. */ #undef HAVE_DLFCN_H +/* Have LRCPC3 support for 16 byte integers. */ +#undef HAVE_FEAT_LRCPC3 + /* Have LSE128 support for 16 byte integers. */ #undef HAVE_FEAT_LSE128 diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 979ed8498cd..bed68e72ea2 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -35,18 +35,28 @@ writes, this will be true when using atomics in actual code. The libat_<op>_16 entry points are ARMv8.0. - The libat_<op>_16_i1 entry points are used when LSE128 is available. - The libat_<op>_16_i2 entry points are used when LSE2 is available. */ + The libat_<op>_16_i1 entry points are used when LSE128+LRCPC3 are available. + The libat_<op>_16_i2 entry points are used when LSE128 is available. + The libat_<op>_16_i3 entry points are used when LRCPC3 is available. + The libat_<op>_16_i4 entry points are used when LSE2 is available. */ #if HAVE_FEAT_LSE128 +# if HAVE_FEAT_LRCPC3 + .arch armv9-a+lse128+rcpc3 +# else .arch armv9-a+lse128 +# endif +#elif HAVE_FEAT_LRCPC3 + .arch armv8-a+lse+rcpc3 #else .arch armv8-a+lse #endif -#define LSE128(NAME) NAME##_i1 -#define LSE2(NAME) NAME##_i2 -#define CORE(NAME) NAME +#define LSE128_LRCPC3(NAME) NAME##_i1 +#define LSE128(NAME) NAME##_i2 +#define LRCPC3(NAME) NAME##_i3 +#define LSE2(NAME) NAME##_i4 +#define CORE(NAME) NAME #define ENTRY(NAME) ENTRY_FEAT1 (NAME) @@ -129,6 +139,29 @@ ENTRY (libat_load_16) END (libat_load_16) +#if HAVE_FEAT_LRCPC3 +ENTRY_FEAT (libat_load_16, LRCPC3) + cbnz w1, 1f + + /* RELAXED. */ + ldp res0, res1, [x0] + ret +1: + cmp w1, SEQ_CST + b.eq 2f + + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ + ldiapp res0, res1, [x0] + ret + + /* SEQ_CST. */ +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ + ldiapp res0, res1, [x0] + ret +END_FEAT (libat_load_16, LRCPC3) +#endif + + ENTRY_FEAT (libat_load_16, LSE2) cbnz w1, 1f @@ -169,6 +202,21 @@ ENTRY (libat_store_16) END (libat_store_16) +#if HAVE_FEAT_LRCPC3 +ENTRY_FEAT (libat_store_16, LRCPC3) + cbnz w4, 1f + + /* RELAXED. */ + stp in0, in1, [x0] + ret + + /* RELEASE/SEQ_CST. */ +1: stilp in0, in1, [x0] + ret +END_FEAT (libat_store_16, LRCPC3) +#endif + + ENTRY_FEAT (libat_store_16, LSE2) cbnz w4, 1f @@ -712,6 +760,27 @@ ENTRY (libat_test_and_set_16) END (libat_test_and_set_16) +/* Alias all LSE128_LRCPC3 ifuncs to their specific implementations, + that is, map it to LSE128, LRCPC or CORE as appropriate. */ + +ALIAS (libat_exchange_16, LSE128_LRCPC3, LSE128) +ALIAS (libat_fetch_or_16, LSE128_LRCPC3, LSE128) +ALIAS (libat_fetch_and_16, LSE128_LRCPC3, LSE128) +ALIAS (libat_or_fetch_16, LSE128_LRCPC3, LSE128) +ALIAS (libat_and_fetch_16, LSE128_LRCPC3, LSE128) +ALIAS (libat_load_16, LSE128_LRCPC3, LRCPC3) +ALIAS (libat_store_16, LSE128_LRCPC3, LRCPC3) +ALIAS (libat_compare_exchange_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_fetch_add_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_add_fetch_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_fetch_sub_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_sub_fetch_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_fetch_xor_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_xor_fetch_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_fetch_nand_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_nand_fetch_16, LSE128_LRCPC3, LSE2) +ALIAS (libat_test_and_set_16, LSE128_LRCPC3, LSE2) + /* Alias entry points which are the same in LSE2 and LSE128. */ #if !HAVE_FEAT_LSE128 @@ -734,6 +803,29 @@ ALIAS (libat_fetch_nand_16, LSE128, LSE2) ALIAS (libat_nand_fetch_16, LSE128, LSE2) ALIAS (libat_test_and_set_16, LSE128, LSE2) + +/* Alias entry points which are the same in LRCPC3 and LSE2. */ + +#if !HAVE_FEAT_LRCPC3 +ALIAS (libat_load_16, LRCPC3, LSE2) +ALIAS (libat_store_16, LRCPC3, LSE2) +#endif +ALIAS (libat_exchange_16, LRCPC3, LSE2) +ALIAS (libat_fetch_or_16, LRCPC3, LSE2) +ALIAS (libat_fetch_and_16, LRCPC3, LSE2) +ALIAS (libat_or_fetch_16, LRCPC3, LSE2) +ALIAS (libat_and_fetch_16, LRCPC3, LSE2) +ALIAS (libat_compare_exchange_16, LRCPC3, LSE2) +ALIAS (libat_fetch_add_16, LRCPC3, LSE2) +ALIAS (libat_add_fetch_16, LRCPC3, LSE2) +ALIAS (libat_fetch_sub_16, LRCPC3, LSE2) +ALIAS (libat_sub_fetch_16, LRCPC3, LSE2) +ALIAS (libat_fetch_xor_16, LRCPC3, LSE2) +ALIAS (libat_xor_fetch_16, LRCPC3, LSE2) +ALIAS (libat_fetch_nand_16, LRCPC3, LSE2) +ALIAS (libat_nand_fetch_16, LRCPC3, LSE2) +ALIAS (libat_test_and_set_16, LRCPC3, LSE2) + /* Alias entry points which are the same in baseline and LSE2. */ ALIAS (libat_exchange_16, LSE2, CORE) diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h index 4e354124063..d03fcfe4a64 100644 --- a/libatomic/config/linux/aarch64/host-config.h +++ b/libatomic/config/linux/aarch64/host-config.h @@ -37,9 +37,12 @@ typedef struct __ifunc_arg_t { #ifdef HWCAP_USCAT # if N == 16 -# define IFUNC_COND_1 (has_lse128 (hwcap, features)) -# define IFUNC_COND_2 (has_lse2 (hwcap, features)) -# define IFUNC_NCOND(N) 2 +# define IFUNC_COND_1 (has_lse128 (hwcap, features) \ + && has_rcpc3 (hwcap, features)) +# define IFUNC_COND_2 (has_lse128 (hwcap, features)) +# define IFUNC_COND_3 (has_rcpc3 (hwcap, features)) +# define IFUNC_COND_4 (has_lse2 (hwcap, features)) +# define IFUNC_NCOND(N) 4 # else # define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS) # define IFUNC_NCOND(N) 1 @@ -90,6 +93,9 @@ has_lse2 (unsigned long hwcap, const __ifunc_arg_t *features) #define AT_FEAT_FIELD(isar0) (((isar0) >> 20) & 15) /* Ensure backwards compatibility with glibc <= 2.38. */ +#ifndef HWCAP2_LRCPC3 +#define HWCAP2_LRCPC3 (1UL << 46) +#endif #ifndef HWCAP2_LSE128 #define HWCAP2_LSE128 (1UL << 47) #endif @@ -116,6 +122,27 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features) return false; } +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The + expected value is 0b0011. Check that. */ + +static inline bool +has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features) +{ + if (hwcap & _IFUNC_ARG_HWCAP + && features->_hwcap2 & HWCAP2_LRCPC3) + return true; + /* Try fallback feature check method to guarantee LRCPC3 is not implemented. + + In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return. + If feature check available, check LSE2 prerequisite before proceeding. */ + if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT)) + return false; + unsigned long isar1; + asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1)); + if (AT_FEAT_FIELD (isar1) >= 3) + return true; + return false; +} #endif #include_next <host-config.h> diff --git a/libatomic/configure b/libatomic/configure index 8ab730d8082..64c87f653f6 100755 --- a/libatomic/configure +++ b/libatomic/configure @@ -656,6 +656,8 @@ LIBAT_BUILD_VERSIONED_SHLIB_FALSE LIBAT_BUILD_VERSIONED_SHLIB_TRUE OPT_LDFLAGS SECTION_LDFLAGS +ARCH_AARCH64_HAVE_LRCPC3_FALSE +ARCH_AARCH64_HAVE_LRCPC3_TRUE ARCH_AARCH64_HAVE_LSE128_FALSE ARCH_AARCH64_HAVE_LSE128_TRUE SYSROOT_CFLAGS_FOR_TARGET @@ -11458,7 +11460,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11461 "configure" +#line 11463 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11564,7 +11566,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11567 "configure" +#line 11569 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -14750,6 +14752,55 @@ fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 insn support" >&5 +$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; } +if ${libat_cv_have_feat_lrcpc3+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm(".arch armv8.2-a+rcpc3") + ; + return 0; +} +_ACEOF + if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 + (eval $ac_link) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + eval libat_cv_have_feat_lrcpc3=yes + else + eval libat_cv_have_feat_lrcpc3=no + fi + rm -f conftest* + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" >&5 +$as_echo "$libat_cv_have_feat_lrcpc3" >&6; } + + yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1 0 '` + +cat >>confdefs.h <<_ACEOF +#define HAVE_FEAT_LRCPC3 $yesno +_ACEOF + + + if test x$libat_cv_have_feat_lrcpc3 = xyes; then + ARCH_AARCH64_HAVE_LRCPC3_TRUE= + ARCH_AARCH64_HAVE_LRCPC3_FALSE='#' +else + ARCH_AARCH64_HAVE_LRCPC3_TRUE='#' + ARCH_AARCH64_HAVE_LRCPC3_FALSE= +fi + + + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5 $as_echo_n "checking whether byte ordering is bigendian... " >&6; } if ${ac_cv_c_bigendian+:} false; then : @@ -16046,6 +16097,10 @@ if test -z "${ARCH_AARCH64_HAVE_LSE128_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LS as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LSE128\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${ARCH_AARCH64_HAVE_LRCPC3_TRUE}" && test -z "${ARCH_AARCH64_HAVE_LRCPC3_FALSE}"; then + as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LRCPC3\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBAT_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBAT_BUILD_VERSIONED_SHLIB_FALSE}"; then as_fn_error $? "conditional \"LIBAT_BUILD_VERSIONED_SHLIB\" was never defined. diff --git a/libatomic/configure.ac b/libatomic/configure.ac index 85824fa7614..8fd20e183a6 100644 --- a/libatomic/configure.ac +++ b/libatomic/configure.ac @@ -208,6 +208,7 @@ LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP]) # Check for target-specific assembly-level support for atomic operations. LIBAT_TEST_FEAT_AARCH64_LSE128() +LIBAT_TEST_FEAT_AARCH64_LRCPC3() AC_C_BIGENDIAN # I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE. -- 2.42.0