The existing arch_xchg/arch_cmpxchg operations are implemented by inserting fence instructions before or after atomic instructions. This commit replaces them with real acquire/release semantics.
|----------------------------------------------------------------| | | arch_xchg_release | arch_cmpxchg_release | | |-----------------------------------------------------------| | | zabha | !zabha | zabha+zacas | !(zabha+zacas) | | rl |-----------------------------------------------------------| | | | (fence rw, w) | | (fence rw, w) | | | amoswap.rl | lr.w | amocas.rl | lr.w | | | | sc.w.rl | | sc.w.rl | |----------------------------------------------------------------| | | arch_xchg_acquire | arch_cmpxchg_acquire | | |-----------------------------------------------------------| | | zabha | !zabha | zabha+zacas | !(zabha+zacas) | | aq |-----------------------------------------------------------| | | | lr.w.aq | | lr.w.aq | | | amoswap.aq | sc.w | amocas.aq | sc.w | | | | (fence r, rw) | | (fence r, rw) | |----------------------------------------------------------------| (fence rw, w), (fence r, rw) here means such instructions will only be inserted when zalasr is not implemented. Signed-off-by: Xu Lu <luxu.ker...@bytedance.com> --- arch/riscv/include/asm/atomic.h | 6 -- arch/riscv/include/asm/cmpxchg.h | 136 ++++++++++++++----------------- 2 files changed, 63 insertions(+), 79 deletions(-) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 5b96c2f61adb5..b79a4f889f339 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -18,12 +18,6 @@ #include <asm/cmpxchg.h> -#define __atomic_acquire_fence() \ - __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory") - -#define __atomic_release_fence() \ - __asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory"); - static __always_inline int arch_atomic_read(const atomic_t *v) { return READ_ONCE(v->counter); diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 0b749e7102162..207fdba38d1fc 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -15,15 +15,23 @@ #include <asm/cpufeature-macros.h> #include <asm/processor.h> -#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ - swap_append, r, p, n) \ +/* + * These macros are here to improve the readability of the arch_xchg_XXX() + * and arch_cmpxchg_XXX() macros. + */ +#define LR_SFX(x) x +#define SC_SFX(x) x +#define CAS_SFX(x) x +#define SC_PREPEND(x) x +#define SC_APPEND(x) x + +#define __arch_xchg_masked(lr_sfx, sc_sfx, swap_sfx, sc_prepend, sc_append, \ + r, p, n) \ ({ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ __asm__ __volatile__ ( \ - prepend \ " amoswap" swap_sfx " %0, %z2, %1\n" \ - swap_append \ : "=&r" (r), "+A" (*(p)) \ : "rJ" (n) \ : "memory"); \ @@ -37,14 +45,16 @@ ulong __rc; \ \ __asm__ __volatile__ ( \ - prepend \ PREFETCHW_ASM(%5) \ + ALTERNATIVE(__nops(1), sc_prepend, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ "0: lr.w %0, %2\n" \ " and %1, %0, %z4\n" \ " or %1, %1, %z3\n" \ " sc.w" sc_sfx " %1, %1, %2\n" \ " bnez %1, 0b\n" \ - sc_append \ + ALTERNATIVE(__nops(1), sc_append, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ : "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \ : "memory"); \ @@ -53,19 +63,17 @@ } \ }) -#define __arch_xchg(sfx, prepend, append, r, p, n) \ +#define __arch_xchg(sfx, r, p, n) \ ({ \ __asm__ __volatile__ ( \ - prepend \ " amoswap" sfx " %0, %2, %1\n" \ - append \ : "=r" (r), "+A" (*(p)) \ : "r" (n) \ : "memory"); \ }) -#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ - sc_append, swap_append) \ +#define _arch_xchg(ptr, new, lr_sfx, sc_sfx, swap_sfx, \ + sc_prepend, sc_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __new = (new); \ @@ -73,22 +81,20 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ - __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ - prepend, sc_append, swap_append, \ + __arch_xchg_masked(lr_sfx, sc_sfx, ".b" swap_sfx, \ + sc_prepend, sc_append, \ __ret, __ptr, __new); \ break; \ case 2: \ - __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ - prepend, sc_append, swap_append, \ + __arch_xchg_masked(lr_sfx, sc_sfx, ".h" swap_sfx, \ + sc_prepend, sc_append, \ __ret, __ptr, __new); \ break; \ case 4: \ - __arch_xchg(".w" swap_sfx, prepend, swap_append, \ - __ret, __ptr, __new); \ + __arch_xchg(".w" swap_sfx, __ret, __ptr, __new); \ break; \ case 8: \ - __arch_xchg(".d" swap_sfx, prepend, swap_append, \ - __ret, __ptr, __new); \ + __arch_xchg(".d" swap_sfx, __ret, __ptr, __new); \ break; \ default: \ BUILD_BUG(); \ @@ -97,17 +103,23 @@ }) #define arch_xchg_relaxed(ptr, x) \ - _arch_xchg(ptr, x, "", "", "", "", "") + _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(__nops(1)), SC_APPEND(__nops(1))) #define arch_xchg_acquire(ptr, x) \ - _arch_xchg(ptr, x, "", "", "", \ - RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) + _arch_xchg(ptr, x, LR_SFX(".aq"), SC_SFX(""), CAS_SFX(".aq"), \ + SC_PREPEND(__nops(1)), \ + SC_APPEND(RISCV_ACQUIRE_BARRIER)) #define arch_xchg_release(ptr, x) \ - _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") + _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(".rl"), CAS_SFX(".rl"), \ + SC_PREPEND(RISCV_RELEASE_BARRIER), \ + SC_APPEND(__nops(1))) #define arch_xchg(ptr, x) \ - _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") + _arch_xchg(ptr, x, LR_SFX(""), SC_SFX(".aqrl"), \ + CAS_SFX(".aqrl"), SC_PREPEND(__nops(1)), \ + SC_APPEND(__nops(1))) #define xchg32(ptr, x) \ ({ \ @@ -126,9 +138,7 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ -#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ - sc_prepend, sc_append, \ - cas_prepend, cas_append, \ +#define __arch_cmpxchg_masked(lr_sfx, sc_sfx, cas_sfx, sc_prepend, sc_append, \ r, p, o, n) \ ({ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ @@ -138,9 +148,7 @@ r = o; \ \ __asm__ __volatile__ ( \ - cas_prepend \ " amocas" cas_sfx " %0, %z2, %1\n" \ - cas_append \ : "+&r" (r), "+A" (*(p)) \ : "rJ" (n) \ : "memory"); \ @@ -155,15 +163,17 @@ ulong __rc; \ \ __asm__ __volatile__ ( \ - sc_prepend \ - "0: lr.w %0, %2\n" \ + ALTERNATIVE(__nops(1), sc_prepend, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ + "0: lr.w" lr_sfx " %0, %2\n" \ " and %1, %0, %z5\n" \ " bne %1, %z3, 1f\n" \ " and %1, %0, %z6\n" \ " or %1, %1, %z4\n" \ " sc.w" sc_sfx " %1, %1, %2\n" \ " bnez %1, 0b\n" \ - sc_append \ + ALTERNATIVE(__nops(1), sc_append, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ "1:\n" \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ : "rJ" ((long)__oldx), "rJ" (__newx), \ @@ -174,9 +184,7 @@ } \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ - sc_prepend, sc_append, \ - cas_prepend, cas_append, \ +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, sc_prepend, sc_append, \ r, p, co, o, n) \ ({ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ @@ -184,9 +192,7 @@ r = o; \ \ __asm__ __volatile__ ( \ - cas_prepend \ " amocas" cas_sfx " %0, %z2, %1\n" \ - cas_append \ : "+&r" (r), "+A" (*(p)) \ : "rJ" (n) \ : "memory"); \ @@ -194,12 +200,14 @@ register unsigned int __rc; \ \ __asm__ __volatile__ ( \ - sc_prepend \ + ALTERNATIVE(__nops(1), sc_prepend, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ "0: lr" lr_sfx " %0, %2\n" \ " bne %0, %z3, 1f\n" \ " sc" sc_sfx " %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - sc_append \ + ALTERNATIVE(__nops(1), sc_append, \ + 0, RISCV_ISA_EXT_ZALASR, 1) \ "1:\n" \ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ : "rJ" (co o), "rJ" (n) \ @@ -207,9 +215,8 @@ } \ }) -#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ - sc_prepend, sc_append, \ - cas_prepend, cas_append) \ +#define _arch_cmpxchg(ptr, old, new, lr_sfx, sc_sfx, cas_sfx, \ + sc_prepend, sc_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ @@ -218,27 +225,23 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ - __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ + __arch_cmpxchg_masked(lr_sfx, sc_sfx, ".b" cas_sfx, \ sc_prepend, sc_append, \ - cas_prepend, cas_append, \ __ret, __ptr, __old, __new); \ break; \ case 2: \ - __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ + __arch_cmpxchg_masked(lr_sfx, sc_sfx, ".h" cas_sfx, \ sc_prepend, sc_append, \ - cas_prepend, cas_append, \ __ret, __ptr, __old, __new); \ break; \ case 4: \ - __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ + __arch_cmpxchg(".w" lr_sfx, ".w" sc_sfx, ".w" cas_sfx, \ sc_prepend, sc_append, \ - cas_prepend, cas_append, \ __ret, __ptr, (long)(int)(long), __old, __new); \ break; \ case 8: \ - __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ + __arch_cmpxchg(".d" lr_sfx, ".d" sc_sfx, ".d" cas_sfx, \ sc_prepend, sc_append, \ - cas_prepend, cas_append, \ __ret, __ptr, /**/, __old, __new); \ break; \ default: \ @@ -247,40 +250,27 @@ (__typeof__(*(__ptr)))__ret; \ }) -/* - * These macros are here to improve the readability of the arch_cmpxchg_XXX() - * macros. - */ -#define SC_SFX(x) x -#define CAS_SFX(x) x -#define SC_PREPEND(x) x -#define SC_APPEND(x) x -#define CAS_PREPEND(x) x -#define CAS_APPEND(x) x - #define arch_cmpxchg_relaxed(ptr, o, n) \ _arch_cmpxchg((ptr), (o), (n), \ - SC_SFX(""), CAS_SFX(""), \ - SC_PREPEND(""), SC_APPEND(""), \ - CAS_PREPEND(""), CAS_APPEND("")) + LR_SFX(""), SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(__nops(1)), SC_APPEND(__nops(1))) #define arch_cmpxchg_acquire(ptr, o, n) \ _arch_cmpxchg((ptr), (o), (n), \ - SC_SFX(""), CAS_SFX(""), \ - SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ - CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) + LR_SFX(".aq"), SC_SFX(""), CAS_SFX(".aq"), \ + SC_PREPEND(__nops(1)), \ + SC_APPEND(RISCV_ACQUIRE_BARRIER)) #define arch_cmpxchg_release(ptr, o, n) \ _arch_cmpxchg((ptr), (o), (n), \ - SC_SFX(""), CAS_SFX(""), \ - SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ - CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) + LR_SFX(""), SC_SFX(".rl"), CAS_SFX(".rl"), \ + SC_PREPEND(RISCV_RELEASE_BARRIER), \ + SC_APPEND(__nops(1))) #define arch_cmpxchg(ptr, o, n) \ _arch_cmpxchg((ptr), (o), (n), \ - SC_SFX(".rl"), CAS_SFX(".aqrl"), \ - SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ - CAS_PREPEND(""), CAS_APPEND("")) + LR_SFX(""), SC_SFX(".aqrl"), CAS_SFX(".aqrl"), \ + SC_PREPEND(__nops(1)), SC_APPEND(__nops(1))) #define arch_cmpxchg_local(ptr, o, n) \ arch_cmpxchg_relaxed((ptr), (o), (n)) -- 2.20.1