Victor Do Nascimento <victor.donascime...@arm.com> writes:
> @@ -712,6 +760,27 @@ ENTRY (libat_test_and_set_16)
>  END (libat_test_and_set_16)
>  
>  
> +/* Alias all LSE128_LRCPC3 ifuncs to their specific implementations,
> +   that is, map it to LSE128, LRCPC or CORE as appropriate.  */
> +
> +ALIAS (libat_exchange_16, LSE128_LRCPC3, LSE128)
> +ALIAS (libat_fetch_or_16, LSE128_LRCPC3, LSE128)
> +ALIAS (libat_fetch_and_16, LSE128_LRCPC3, LSE128)
> +ALIAS (libat_or_fetch_16, LSE128_LRCPC3, LSE128)
> +ALIAS (libat_and_fetch_16, LSE128_LRCPC3, LSE128)
> +ALIAS (libat_load_16, LSE128_LRCPC3, LRCPC3)
> +ALIAS (libat_store_16, LSE128_LRCPC3, LRCPC3)
> +ALIAS (libat_compare_exchange_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_fetch_add_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_add_fetch_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_fetch_sub_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_sub_fetch_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_fetch_xor_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_xor_fetch_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_fetch_nand_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_nand_fetch_16, LSE128_LRCPC3, LSE2)
> +ALIAS (libat_test_and_set_16, LSE128_LRCPC3, LSE2)
> +
>  /* Alias entry points which are the same in LSE2 and LSE128.  */
>  
>  #if !HAVE_FEAT_LSE128
> @@ -734,6 +803,29 @@ ALIAS (libat_fetch_nand_16, LSE128, LSE2)
>  ALIAS (libat_nand_fetch_16, LSE128, LSE2)
>  ALIAS (libat_test_and_set_16, LSE128, LSE2)
>  
> +
> +/* Alias entry points which are the same in LRCPC3 and LSE2.  */
> +
> +#if !HAVE_FEAT_LRCPC3
> +ALIAS (libat_load_16, LRCPC3, LSE2)
> +ALIAS (libat_store_16, LRCPC3, LSE2)
> +#endif
> +ALIAS (libat_exchange_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_or_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_and_16, LRCPC3, LSE2)
> +ALIAS (libat_or_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_and_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_compare_exchange_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_add_16, LRCPC3, LSE2)
> +ALIAS (libat_add_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_sub_16, LRCPC3, LSE2)
> +ALIAS (libat_sub_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_xor_16, LRCPC3, LSE2)
> +ALIAS (libat_xor_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_fetch_nand_16, LRCPC3, LSE2)
> +ALIAS (libat_nand_fetch_16, LRCPC3, LSE2)
> +ALIAS (libat_test_and_set_16, LRCPC3, LSE2)
> +
>  /* Alias entry points which are the same in baseline and LSE2.  */
>  
>  ALIAS (libat_exchange_16, LSE2, CORE)

Sorry to be awkward, but I think this is becoming a bit unwieldly.
It wasn't too bad using aliases for LSE128->LSE2 fallbacks since LSE128
could optimise a decent number of routines.  But here we're using two
sets of aliases for every function in order to express "LRCPC3 loads and
stores should be used where possible, independently of the choices for
other routines".

This also complicates the ifuncs, since we need to detect LRCPC3+LSE128
as a distinct combination even though none of the underlying routines
depend on both LRCPC3 and LSE128 together.

I think instead we should make the ifunc mechanism more granular,
so that we can have default/lse2/rcpc3 for loads and stores,
default/lse128 for things that LSE128 can do, etc.  I realise that
would require some rework of the generic framework code, but I think
it's still worth doing.

I won't object if another maintainer is happy with the patch in its
current form though.

Thanks,
Richard

> diff --git a/libatomic/config/linux/aarch64/host-config.h 
> b/libatomic/config/linux/aarch64/host-config.h
> index 4e354124063..d03fcfe4a64 100644
> --- a/libatomic/config/linux/aarch64/host-config.h
> +++ b/libatomic/config/linux/aarch64/host-config.h
> @@ -37,9 +37,12 @@ typedef struct __ifunc_arg_t {
>  
>  #ifdef HWCAP_USCAT
>  # if N == 16
> -#  define IFUNC_COND_1               (has_lse128 (hwcap, features))
> -#  define IFUNC_COND_2               (has_lse2 (hwcap, features))
> -#  define IFUNC_NCOND(N)     2
> +#  define IFUNC_COND_1               (has_lse128 (hwcap, features) \
> +                              && has_rcpc3 (hwcap, features))
> +#  define IFUNC_COND_2               (has_lse128 (hwcap, features))
> +#  define IFUNC_COND_3               (has_rcpc3  (hwcap, features))
> +#  define IFUNC_COND_4               (has_lse2   (hwcap, features))
> +#  define IFUNC_NCOND(N)     4
>  # else
>  #  define IFUNC_COND_1               (hwcap & HWCAP_ATOMICS)
>  #  define IFUNC_NCOND(N)     1
> @@ -90,6 +93,9 @@ has_lse2 (unsigned long hwcap, const __ifunc_arg_t 
> *features)
>  #define AT_FEAT_FIELD(isar0) (((isar0) >> 20) & 15)
>  
>  /* Ensure backwards compatibility with glibc <= 2.38.  */
> +#ifndef HWCAP2_LRCPC3
> +#define HWCAP2_LRCPC3                (1UL << 46)
> +#endif
>  #ifndef HWCAP2_LSE128
>  #define HWCAP2_LSE128                (1UL << 47)
>  #endif
> @@ -116,6 +122,27 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t 
> *features)
>    return false;
>  }
>  
> +/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].  The
> +   expected value is 0b0011.  Check that.  */
> +
> +static inline bool
> +has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
> +{
> +  if (hwcap & _IFUNC_ARG_HWCAP
> +      && features->_hwcap2 & HWCAP2_LRCPC3)
> +    return true;
> +  /* Try fallback feature check method to guarantee LRCPC3 is not 
> implemented.
> +
> +     In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
> +     If feature check available, check LSE2 prerequisite before proceeding.  
> */
> +  if (!(hwcap & HWCAP_CPUID)  || !(hwcap & HWCAP_USCAT))
> +    return false;
> +  unsigned long isar1;
> +  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
> +  if (AT_FEAT_FIELD (isar1) >= 3)
> +    return true;
> +  return false;
> +}
>  #endif
>  
>  #include_next <host-config.h>
> diff --git a/libatomic/configure b/libatomic/configure
> index 8ab730d8082..64c87f653f6 100755
> --- a/libatomic/configure
> +++ b/libatomic/configure
> @@ -656,6 +656,8 @@ LIBAT_BUILD_VERSIONED_SHLIB_FALSE
>  LIBAT_BUILD_VERSIONED_SHLIB_TRUE
>  OPT_LDFLAGS
>  SECTION_LDFLAGS
> +ARCH_AARCH64_HAVE_LRCPC3_FALSE
> +ARCH_AARCH64_HAVE_LRCPC3_TRUE
>  ARCH_AARCH64_HAVE_LSE128_FALSE
>  ARCH_AARCH64_HAVE_LSE128_TRUE
>  SYSROOT_CFLAGS_FOR_TARGET
> @@ -11458,7 +11460,7 @@ else
>    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>    lt_status=$lt_dlunknown
>    cat > conftest.$ac_ext <<_LT_EOF
> -#line 11461 "configure"
> +#line 11463 "configure"
>  #include "confdefs.h"
>  
>  #if HAVE_DLFCN_H
> @@ -11564,7 +11566,7 @@ else
>    lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
>    lt_status=$lt_dlunknown
>    cat > conftest.$ac_ext <<_LT_EOF
> -#line 11567 "configure"
> +#line 11569 "configure"
>  #include "confdefs.h"
>  
>  #if HAVE_DLFCN_H
> @@ -14750,6 +14752,55 @@ fi
>  
>  
>  
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 
> insn support" >&5
> +$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; }
> +if ${libat_cv_have_feat_lrcpc3+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +
> +    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
> +/* end confdefs.h.  */
> +
> +int
> +main ()
> +{
> +asm(".arch armv8.2-a+rcpc3")
> +  ;
> +  return 0;
> +}
> +_ACEOF
> +    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
> +  (eval $ac_link) 2>&5
> +  ac_status=$?
> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; then
> +      eval libat_cv_have_feat_lrcpc3=yes
> +    else
> +      eval libat_cv_have_feat_lrcpc3=no
> +    fi
> +    rm -f conftest*
> +
> +fi
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" 
> >&5
> +$as_echo "$libat_cv_have_feat_lrcpc3" >&6; }
> +
> +  yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1  0 '`
> +
> +cat >>confdefs.h <<_ACEOF
> +#define HAVE_FEAT_LRCPC3 $yesno
> +_ACEOF
> +
> +
> +   if test x$libat_cv_have_feat_lrcpc3 = xyes; then
> +  ARCH_AARCH64_HAVE_LRCPC3_TRUE=
> +  ARCH_AARCH64_HAVE_LRCPC3_FALSE='#'
> +else
> +  ARCH_AARCH64_HAVE_LRCPC3_TRUE='#'
> +  ARCH_AARCH64_HAVE_LRCPC3_FALSE=
> +fi
> +
> +
> +
>   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is 
> bigendian" >&5
>  $as_echo_n "checking whether byte ordering is bigendian... " >&6; }
>  if ${ac_cv_c_bigendian+:} false; then :
> @@ -16046,6 +16097,10 @@ if test -z "${ARCH_AARCH64_HAVE_LSE128_TRUE}" && 
> test -z "${ARCH_AARCH64_HAVE_LS
>    as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LSE128\" was never defined.
>  Usually this means the macro was only invoked conditionally." "$LINENO" 5
>  fi
> +if test -z "${ARCH_AARCH64_HAVE_LRCPC3_TRUE}" && test -z 
> "${ARCH_AARCH64_HAVE_LRCPC3_FALSE}"; then
> +  as_fn_error $? "conditional \"ARCH_AARCH64_HAVE_LRCPC3\" was never defined.
> +Usually this means the macro was only invoked conditionally." "$LINENO" 5
> +fi
>  
>  if test -z "${LIBAT_BUILD_VERSIONED_SHLIB_TRUE}" && test -z 
> "${LIBAT_BUILD_VERSIONED_SHLIB_FALSE}"; then
>    as_fn_error $? "conditional \"LIBAT_BUILD_VERSIONED_SHLIB\" was never 
> defined.
> diff --git a/libatomic/configure.ac b/libatomic/configure.ac
> index 85824fa7614..8fd20e183a6 100644
> --- a/libatomic/configure.ac
> +++ b/libatomic/configure.ac
> @@ -208,6 +208,7 @@ LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP])
>  
>  # Check for target-specific assembly-level support for atomic operations.
>  LIBAT_TEST_FEAT_AARCH64_LSE128()
> +LIBAT_TEST_FEAT_AARCH64_LRCPC3()
>  
>  AC_C_BIGENDIAN
>  # I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE.

Reply via email to