[RFC 2/7] eal: reimplement rte_smp_*mb with rte_atomic_thread_fence

Stephen Hemminger Wed, 20 May 2026 21:21:10 -0700

The rte_smp_mb(), rte_smp_wmb() and rte_smp_rmb() functions were
flagged as deprecated by commit 3ec965b6de12 ("doc: update atomic
operation deprecation") in 2021 but nothing came of it.
Reimplement them as inline wrappers over rte_atomic_thread_fence()
and drop the deprecation notice.
The API is preserved; only the implementation changes.


Generated code is unchanged on x86 (seq_cst keeps the lock-addl
trick, release/acquire collapse to a compiler barrier under TSO).
On arm64, release/acquire emit dmb ish instead of dmb ishst/ishld;
the difference is below measurement noise.

Signed-off-by: Stephen Hemminger <[email protected]>
---
 doc/guides/rel_notes/deprecation.rst   |   8 --
 lib/eal/arm/include/rte_atomic_32.h    |   6 --
 lib/eal/arm/include/rte_atomic_64.h    |   6 --
 lib/eal/include/generic/rte_atomic.h   | 106 +++++++++++--------------
 lib/eal/loongarch/include/rte_atomic.h |   6 --
 lib/eal/ppc/include/rte_atomic.h       |   6 --
 lib/eal/riscv/include/rte_atomic.h     |   6 --
 lib/eal/x86/include/rte_atomic.h       |  33 +++-----
 8 files changed, 57 insertions(+), 120 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 346c517623..03b763b472 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -47,14 +47,6 @@ Deprecation Notices
   operations must be used for patches that need to be merged in 20.08 onwards.
   This change will not introduce any performance degradation.
 
-* rte_smp_*mb: These APIs provide full barrier functionality. However, many
-  use cases do not require full barriers. To support such use cases, DPDK has
-  adopted atomic operations from
-  https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html. These
-  operations and a new wrapper ``rte_atomic_thread_fence`` instead of
-  ``__atomic_thread_fence`` must be used for patches that need to be merged in
-  20.08 onwards. This change will not introduce any performance degradation.
-
 * lib: will fix extending some enum/define breaking the ABI. There are multiple
   samples in DPDK that enum/define terminated with a ``.*MAX.*`` value which is
   used by iterators, and arrays holding these values are sized with this
diff --git a/lib/eal/arm/include/rte_atomic_32.h 
b/lib/eal/arm/include/rte_atomic_32.h
index 0b9a0dfa30..3809ddefb7 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -21,12 +21,6 @@ extern "C" {
 
 #define        rte_rmb() __sync_synchronize()
 
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_wmb()
-
-#define rte_smp_rmb() rte_rmb()
-
 #define rte_io_mb() rte_mb()
 
 #define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/arm/include/rte_atomic_64.h 
b/lib/eal/arm/include/rte_atomic_64.h
index 181bb60929..c9b41f6212 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -24,12 +24,6 @@ extern "C" {
 
 #define rte_rmb() asm volatile("dmb oshld" : : : "memory")
 
-#define rte_smp_mb() asm volatile("dmb ish" : : : "memory")
-
-#define rte_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-
-#define rte_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-
 #define rte_io_mb() rte_mb()
 
 #define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/include/generic/rte_atomic.h 
b/lib/eal/include/generic/rte_atomic.h
index 0a4f3f8528..4e9d230f85 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -49,69 +49,8 @@ static inline void rte_wmb(void);
  * occur before the LOAD operations generated after.
  */
 static inline void rte_rmb(void);
-///@}
-
-/** @name SMP Memory Barrier
- */
-///@{
-/**
- * General memory barrier between lcores
- *
- * Guarantees that the LOAD and STORE operations that precede the
- * rte_smp_mb() call are globally visible across the lcores
- * before the LOAD and STORE operations that follows it.
- *
- * @note
- *  This function is deprecated.
- *  It provides similar synchronization primitive as atomic fence,
- *  but has different syntax and memory ordering semantic. Hence
- *  deprecated for the simplicity of memory ordering semantics in use.
- *
- *  rte_atomic_thread_fence(rte_memory_order_acq_rel) should be used instead.
- */
-static inline void rte_smp_mb(void);
 
-/**
- * Write memory barrier between lcores
- *
- * Guarantees that the STORE operations that precede the
- * rte_smp_wmb() call are globally visible across the lcores
- * before the STORE operations that follows it.
- *
- * @note
- *  This function is deprecated.
- *  It provides similar synchronization primitive as atomic fence,
- *  but has different syntax and memory ordering semantic. Hence
- *  deprecated for the simplicity of memory ordering semantics in use.
- *
- *  rte_atomic_thread_fence(rte_memory_order_release) should be used instead.
- *  The fence also guarantees LOAD operations that precede the call
- *  are globally visible across the lcores before the STORE operations
- *  that follows it.
- */
-static inline void rte_smp_wmb(void);
-
-/**
- * Read memory barrier between lcores
- *
- * Guarantees that the LOAD operations that precede the
- * rte_smp_rmb() call are globally visible across the lcores
- * before the LOAD operations that follows it.
- *
- * @note
- *  This function is deprecated.
- *  It provides similar synchronization primitive as atomic fence,
- *  but has different syntax and memory ordering semantic. Hence
- *  deprecated for the simplicity of memory ordering semantics in use.
- *
- *  rte_atomic_thread_fence(rte_memory_order_acquire) should be used instead.
- *  The fence also guarantees LOAD operations that precede the call
- *  are globally visible across the lcores before the STORE operations
- *  that follows it.
- */
-static inline void rte_smp_rmb(void);
 ///@}
-
 /** @name I/O Memory Barrier
  */
 ///@{
@@ -164,6 +103,51 @@ static inline void rte_io_rmb(void);
  */
 static inline void rte_atomic_thread_fence(rte_memory_order memorder);
 
+
+/** @name SMP Memory Barrier
+ */
+///@{
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the LOAD and STORE operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_mb(void)
+{
+       rte_atomic_thread_fence(rte_memory_order_seq_cst);
+}
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the STORE operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_wmb(void)
+{
+       rte_atomic_thread_fence(rte_memory_order_release);
+}
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the LOAD operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_rmb(void)
+{
+       rte_atomic_thread_fence(rte_memory_order_acquire);
+}
+
+///@}
+
 /*------------------------- 16 bit atomic operations 
-------------------------*/
 
 #ifndef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/loongarch/include/rte_atomic.h 
b/lib/eal/loongarch/include/rte_atomic.h
index c8066a4612..49e0c67020 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -22,12 +22,6 @@ extern "C" {
 
 #define rte_rmb()      rte_mb()
 
-#define rte_smp_mb()   rte_mb()
-
-#define rte_smp_wmb()  rte_mb()
-
-#define rte_smp_rmb()  rte_mb()
-
 #define rte_io_mb()    rte_mb()
 
 #define rte_io_wmb()   rte_mb()
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 10acc238f9..1da5afccbf 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -24,12 +24,6 @@ extern "C" {
 
 #define        rte_rmb() asm volatile("sync" : : : "memory")
 
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_wmb()
-
-#define rte_smp_rmb() rte_rmb()
-
 #define rte_io_mb() rte_mb()
 
 #define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/riscv/include/rte_atomic.h 
b/lib/eal/riscv/include/rte_atomic.h
index 66346ad474..dd10ad5127 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -27,12 +27,6 @@ extern "C" {
 
 #define rte_rmb()      asm volatile("fence r, r" : : : "memory")
 
-#define rte_smp_mb()   rte_mb()
-
-#define rte_smp_wmb()  rte_wmb()
-
-#define rte_smp_rmb()  rte_rmb()
-
 #define rte_io_mb()    asm volatile("fence iorw, iorw" : : : "memory")
 
 #define rte_io_wmb()   asm volatile("fence orw, ow" : : : "memory")
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index e071e4234e..a850b0257c 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -23,10 +23,6 @@
 
 #define        rte_rmb() _mm_lfence()
 
-#define rte_smp_wmb() rte_compiler_barrier()
-
-#define rte_smp_rmb() rte_compiler_barrier()
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -63,20 +59,6 @@ extern "C" {
  * So below we use that technique for rte_smp_mb() implementation.
  */
 
-static __rte_always_inline void
-rte_smp_mb(void)
-{
-#ifdef RTE_TOOLCHAIN_MSVC
-       _mm_mfence();
-#else
-#ifdef RTE_ARCH_I686
-       asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
-#else
-       asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
-#endif
-#endif
-}
-
 #define rte_io_mb() rte_mb()
 
 #define rte_io_wmb() rte_compiler_barrier()
@@ -93,10 +75,19 @@ rte_smp_mb(void)
 static __rte_always_inline void
 rte_atomic_thread_fence(rte_memory_order memorder)
 {
-       if (memorder == rte_memory_order_seq_cst)
-               rte_smp_mb();
-       else
+       if (memorder == rte_memory_order_seq_cst) {
+#ifdef RTE_TOOLCHAIN_MSVC
+               _mm_mfence();
+#else
+#ifdef RTE_ARCH_I686
+               asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
+#else
+               asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+#endif
+#endif
+       } else {
                __rte_atomic_thread_fence(memorder);
+       }
 }
 
 #ifdef __cplusplus
-- 
2.53.0

[RFC 2/7] eal: reimplement rte_smp_*mb with rte_atomic_thread_fence

Reply via email to