The rte_smp_mb(), rte_smp_wmb() and rte_smp_rmb() functions were
flagged as deprecated by commit 3ec965b6de12 ("doc: update atomic
operation deprecation") in 2021 but nothing came of it.
Reimplement them as inline wrappers over rte_atomic_thread_fence()
and drop the deprecation notice.
The API is preserved; only the implementation changes.Generated code is unchanged on x86 (seq_cst keeps the lock-addl trick, release/acquire collapse to a compiler barrier under TSO). On arm64, release/acquire emit dmb ish instead of dmb ishst/ishld; the difference is below measurement noise. Signed-off-by: Stephen Hemminger <[email protected]> --- doc/guides/rel_notes/deprecation.rst | 8 -- lib/eal/arm/include/rte_atomic_32.h | 6 -- lib/eal/arm/include/rte_atomic_64.h | 6 -- lib/eal/include/generic/rte_atomic.h | 106 +++++++++++-------------- lib/eal/loongarch/include/rte_atomic.h | 6 -- lib/eal/ppc/include/rte_atomic.h | 6 -- lib/eal/riscv/include/rte_atomic.h | 6 -- lib/eal/x86/include/rte_atomic.h | 33 +++----- 8 files changed, 57 insertions(+), 120 deletions(-) diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst index 346c517623..03b763b472 100644 --- a/doc/guides/rel_notes/deprecation.rst +++ b/doc/guides/rel_notes/deprecation.rst @@ -47,14 +47,6 @@ Deprecation Notices operations must be used for patches that need to be merged in 20.08 onwards. This change will not introduce any performance degradation. -* rte_smp_*mb: These APIs provide full barrier functionality. However, many - use cases do not require full barriers. To support such use cases, DPDK has - adopted atomic operations from - https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html. These - operations and a new wrapper ``rte_atomic_thread_fence`` instead of - ``__atomic_thread_fence`` must be used for patches that need to be merged in - 20.08 onwards. This change will not introduce any performance degradation. - * lib: will fix extending some enum/define breaking the ABI. There are multiple samples in DPDK that enum/define terminated with a ``.*MAX.*`` value which is used by iterators, and arrays holding these values are sized with this diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h index 0b9a0dfa30..3809ddefb7 100644 --- a/lib/eal/arm/include/rte_atomic_32.h +++ b/lib/eal/arm/include/rte_atomic_32.h @@ -21,12 +21,6 @@ extern "C" { #define rte_rmb() __sync_synchronize() -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_wmb() - -#define rte_smp_rmb() rte_rmb() - #define rte_io_mb() rte_mb() #define rte_io_wmb() rte_wmb() diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h index 181bb60929..c9b41f6212 100644 --- a/lib/eal/arm/include/rte_atomic_64.h +++ b/lib/eal/arm/include/rte_atomic_64.h @@ -24,12 +24,6 @@ extern "C" { #define rte_rmb() asm volatile("dmb oshld" : : : "memory") -#define rte_smp_mb() asm volatile("dmb ish" : : : "memory") - -#define rte_smp_wmb() asm volatile("dmb ishst" : : : "memory") - -#define rte_smp_rmb() asm volatile("dmb ishld" : : : "memory") - #define rte_io_mb() rte_mb() #define rte_io_wmb() rte_wmb() diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h index 0a4f3f8528..4e9d230f85 100644 --- a/lib/eal/include/generic/rte_atomic.h +++ b/lib/eal/include/generic/rte_atomic.h @@ -49,69 +49,8 @@ static inline void rte_wmb(void); * occur before the LOAD operations generated after. */ static inline void rte_rmb(void); -///@} - -/** @name SMP Memory Barrier - */ -///@{ -/** - * General memory barrier between lcores - * - * Guarantees that the LOAD and STORE operations that precede the - * rte_smp_mb() call are globally visible across the lcores - * before the LOAD and STORE operations that follows it. - * - * @note - * This function is deprecated. - * It provides similar synchronization primitive as atomic fence, - * but has different syntax and memory ordering semantic. Hence - * deprecated for the simplicity of memory ordering semantics in use. - * - * rte_atomic_thread_fence(rte_memory_order_acq_rel) should be used instead. - */ -static inline void rte_smp_mb(void); -/** - * Write memory barrier between lcores - * - * Guarantees that the STORE operations that precede the - * rte_smp_wmb() call are globally visible across the lcores - * before the STORE operations that follows it. - * - * @note - * This function is deprecated. - * It provides similar synchronization primitive as atomic fence, - * but has different syntax and memory ordering semantic. Hence - * deprecated for the simplicity of memory ordering semantics in use. - * - * rte_atomic_thread_fence(rte_memory_order_release) should be used instead. - * The fence also guarantees LOAD operations that precede the call - * are globally visible across the lcores before the STORE operations - * that follows it. - */ -static inline void rte_smp_wmb(void); - -/** - * Read memory barrier between lcores - * - * Guarantees that the LOAD operations that precede the - * rte_smp_rmb() call are globally visible across the lcores - * before the LOAD operations that follows it. - * - * @note - * This function is deprecated. - * It provides similar synchronization primitive as atomic fence, - * but has different syntax and memory ordering semantic. Hence - * deprecated for the simplicity of memory ordering semantics in use. - * - * rte_atomic_thread_fence(rte_memory_order_acquire) should be used instead. - * The fence also guarantees LOAD operations that precede the call - * are globally visible across the lcores before the STORE operations - * that follows it. - */ -static inline void rte_smp_rmb(void); ///@} - /** @name I/O Memory Barrier */ ///@{ @@ -164,6 +103,51 @@ static inline void rte_io_rmb(void); */ static inline void rte_atomic_thread_fence(rte_memory_order memorder); + +/** @name SMP Memory Barrier + */ +///@{ +/** + * General memory barrier between lcores + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores + * before the LOAD and STORE operations that follows it. + */ +static __rte_always_inline void +rte_smp_mb(void) +{ + rte_atomic_thread_fence(rte_memory_order_seq_cst); +} + +/** + * Write memory barrier between lcores + * + * Guarantees that the STORE operations that precede the + * rte_smp_wmb() call are globally visible across the lcores + * before the STORE operations that follows it. + */ +static __rte_always_inline void +rte_smp_wmb(void) +{ + rte_atomic_thread_fence(rte_memory_order_release); +} + +/** + * Read memory barrier between lcores + * + * Guarantees that the LOAD operations that precede the + * rte_smp_rmb() call are globally visible across the lcores + * before the LOAD operations that follows it. + */ +static __rte_always_inline void +rte_smp_rmb(void) +{ + rte_atomic_thread_fence(rte_memory_order_acquire); +} + +///@} + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_TOOLCHAIN_MSVC diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h index c8066a4612..49e0c67020 100644 --- a/lib/eal/loongarch/include/rte_atomic.h +++ b/lib/eal/loongarch/include/rte_atomic.h @@ -22,12 +22,6 @@ extern "C" { #define rte_rmb() rte_mb() -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_mb() - -#define rte_smp_rmb() rte_mb() - #define rte_io_mb() rte_mb() #define rte_io_wmb() rte_mb() diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h index 10acc238f9..1da5afccbf 100644 --- a/lib/eal/ppc/include/rte_atomic.h +++ b/lib/eal/ppc/include/rte_atomic.h @@ -24,12 +24,6 @@ extern "C" { #define rte_rmb() asm volatile("sync" : : : "memory") -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_wmb() - -#define rte_smp_rmb() rte_rmb() - #define rte_io_mb() rte_mb() #define rte_io_wmb() rte_wmb() diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h index 66346ad474..dd10ad5127 100644 --- a/lib/eal/riscv/include/rte_atomic.h +++ b/lib/eal/riscv/include/rte_atomic.h @@ -27,12 +27,6 @@ extern "C" { #define rte_rmb() asm volatile("fence r, r" : : : "memory") -#define rte_smp_mb() rte_mb() - -#define rte_smp_wmb() rte_wmb() - -#define rte_smp_rmb() rte_rmb() - #define rte_io_mb() asm volatile("fence iorw, iorw" : : : "memory") #define rte_io_wmb() asm volatile("fence orw, ow" : : : "memory") diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h index e071e4234e..a850b0257c 100644 --- a/lib/eal/x86/include/rte_atomic.h +++ b/lib/eal/x86/include/rte_atomic.h @@ -23,10 +23,6 @@ #define rte_rmb() _mm_lfence() -#define rte_smp_wmb() rte_compiler_barrier() - -#define rte_smp_rmb() rte_compiler_barrier() - #ifdef __cplusplus extern "C" { #endif @@ -63,20 +59,6 @@ extern "C" { * So below we use that technique for rte_smp_mb() implementation. */ -static __rte_always_inline void -rte_smp_mb(void) -{ -#ifdef RTE_TOOLCHAIN_MSVC - _mm_mfence(); -#else -#ifdef RTE_ARCH_I686 - asm volatile("lock addl $0, -128(%%esp); " ::: "memory"); -#else - asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); -#endif -#endif -} - #define rte_io_mb() rte_mb() #define rte_io_wmb() rte_compiler_barrier() @@ -93,10 +75,19 @@ rte_smp_mb(void) static __rte_always_inline void rte_atomic_thread_fence(rte_memory_order memorder) { - if (memorder == rte_memory_order_seq_cst) - rte_smp_mb(); - else + if (memorder == rte_memory_order_seq_cst) { +#ifdef RTE_TOOLCHAIN_MSVC + _mm_mfence(); +#else +#ifdef RTE_ARCH_I686 + asm volatile("lock addl $0, -128(%%esp); " ::: "memory"); +#else + asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); +#endif +#endif + } else { __rte_atomic_thread_fence(memorder); + } } #ifdef __cplusplus -- 2.53.0

