Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- host/include/aarch64/host/atomic128-cas.h.inc | 57 +++++++++++ host/include/generic/host/atomic128-cas.h.inc | 96 +++++++++++++++++++ 2 files changed, 153 insertions(+)
diff --git a/host/include/aarch64/host/atomic128-cas.h.inc b/host/include/aarch64/host/atomic128-cas.h.inc index 991da4ef54..aec27df182 100644 --- a/host/include/aarch64/host/atomic128-cas.h.inc +++ b/host/include/aarch64/host/atomic128-cas.h.inc @@ -38,6 +38,63 @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) return int128_make128(oldl, oldh); } +static inline Int128 atomic16_xchg(Int128 *ptr, Int128 new) +{ + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) + : [newl] "r"(newl), [newh] "r"(newh) + : "memory"); + + return int128_make128(oldl, oldh); +} + +static inline Int128 atomic16_fetch_and(Int128 *ptr, Int128 new) +{ + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh, tmpl, tmph; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "and %[tmpl], %[oldl], %[newl]\n\t" + "and %[tmph], %[oldh], %[newh]\n\t" + "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) + : [newl] "r"(newl), [newh] "r"(newh), + [tmpl] "r"(tmpl), [tmph] "r"(tmph) + : "memory"); + + return int128_make128(oldl, oldh); +} + +static inline Int128 atomic16_fetch_or(Int128 *ptr, Int128 new) +{ + uint64_t newl = int128_getlo(new), newh = int128_gethi(new); + uint64_t oldl, oldh, tmpl, tmph; + uint32_t tmp; + + asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t" + "orr %[tmpl], %[oldl], %[newl]\n\t" + "orr %[tmph], %[oldh], %[newh]\n\t" + "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t" + "cbnz %w[tmp], 0b" + : [mem] "+m"(*ptr), [tmp] "=&r"(tmp), + [oldl] "=&r"(oldl), [oldh] "=&r"(oldh) + : [newl] "r"(newl), [newh] "r"(newh), + [tmpl] "r"(tmpl), [tmph] "r"(tmph) + : "memory"); + + return int128_make128(oldl, oldh); +} + # define CONFIG_CMPXCHG128 1 # define HAVE_CMPXCHG128 1 #endif diff --git a/host/include/generic/host/atomic128-cas.h.inc b/host/include/generic/host/atomic128-cas.h.inc index 6b40cc2271..990162c56f 100644 --- a/host/include/generic/host/atomic128-cas.h.inc +++ b/host/include/generic/host/atomic128-cas.h.inc @@ -23,6 +23,51 @@ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i); return r.s; } + +/* + * Since we're looping anyway, use weak compare and swap. + * If the host supports weak, this will eliminate a second loop hidden + * within the atomic operation itself; otherwise the weak parameter is + * ignored. + */ +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_xchg(Int128 *ptr, Int128 new) +{ + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128 old = *ptr_align; + + while (!__atomic_compare_exchange_n(ptr_align, &old, new, true, + __ATOMIC_SEQ_CST, 0)) { + continue; + } + return old; +} + +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_fetch_and(Int128 *ptr, Int128 val) +{ + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128 old = *ptr_align; + + while (!__atomic_compare_exchange_n(ptr_align, &old, old & val, true, + __ATOMIC_SEQ_CST, 0)) { + continue; + } + return old; +} + +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_fetch_or(Int128 *ptr, Int128 val) +{ + __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128 old = *ptr_align; + + while (!__atomic_compare_exchange_n(ptr_align, &old, old | val, true, + __ATOMIC_SEQ_CST, 0)) { + continue; + } + return old; +} # define HAVE_CMPXCHG128 1 #elif defined(CONFIG_CMPXCHG128) static inline Int128 ATTRIBUTE_ATOMIC128_OPT @@ -36,6 +81,57 @@ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new) r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i); return r.s; } + +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_xchg(Int128 *ptr, Int128 new) +{ + Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128Alias o, n; + + n.s = new; + o.s = *ptr_align; + while (1) { + __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, n.i); + if (c == o.i) { + return o.s; + } + o.i = c; + } +} + +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_fetch_and(Int128 *ptr, Int128 val) +{ + Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128Alias o, v; + + v.s = val; + o.s = *ptr_align; + while (1) { + __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, o.i & v.i); + if (c == o.i) { + return o.s; + } + o.i = c; + } +} + +static inline Int128 ATTRIBUTE_ATOMIC128_OPT +atomic16_fetch_or(Int128 *ptr, Int128 val) +{ + Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16); + Int128Alias o, v; + + v.s = val; + o.s = *ptr_align; + while (1) { + __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, o.i | v.i); + if (c == o.i) { + return o.s; + } + o.i = c; + } +} # define HAVE_CMPXCHG128 1 #else /* Fallback definition that must be optimized away, or error. */ -- 2.43.0