The atomic test uses tight spinloops to synchronize worker threads
before starting each test phase. These spinloops lack rte_pause()
which causes problems on high core count systems, particularly AMD Zen
architectures where:

- Tight spinloops without pause can starve SMT sibling threads
- Memory ordering and store-buffer forwarding behave differently
- Higher core counts amplify timing windows for race conditions

This manifests as sporadic test failures on systems with 32+ cores
that don't reproduce on smaller core count systems.

Add rte_pause() to all seven synchronization spinloops to allow
proper CPU resource sharing and improve memory ordering behavior.

Fixes: af75078fece3 ("first public release")
Cc: [email protected]

Signed-off-by: Stephen Hemminger <[email protected]>
---
 app/test/test_atomic.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index 8160a33e0e..b1a0d40ece 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -15,6 +15,7 @@
 #include <rte_atomic.h>
 #include <rte_eal.h>
 #include <rte_lcore.h>
+#include <rte_pause.h>
 #include <rte_random.h>
 #include <rte_hash_crc.h>
 
@@ -114,7 +115,7 @@ test_atomic_usual(__rte_unused void *arg)
        unsigned i;
 
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        for (i = 0; i < N; i++)
                rte_atomic16_inc(&a16);
@@ -150,7 +151,7 @@ static int
 test_atomic_tas(__rte_unused void *arg)
 {
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        if (rte_atomic16_test_and_set(&a16))
                rte_atomic64_inc(&count);
@@ -171,7 +172,7 @@ test_atomic_addsub_and_return(__rte_unused void *arg)
        unsigned i;
 
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        for (i = 0; i < N; i++) {
                tmp16 = rte_atomic16_add_return(&a16, 1);
@@ -210,7 +211,7 @@ static int
 test_atomic_inc_and_test(__rte_unused void *arg)
 {
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        if (rte_atomic16_inc_and_test(&a16)) {
                rte_atomic64_inc(&count);
@@ -237,7 +238,7 @@ static int
 test_atomic_dec_and_test(__rte_unused void *arg)
 {
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        if (rte_atomic16_dec_and_test(&a16))
                rte_atomic64_inc(&count);
@@ -269,7 +270,7 @@ test_atomic128_cmp_exchange(__rte_unused void *arg)
        unsigned int i;
 
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        expected = count128;
 
@@ -407,7 +408,7 @@ test_atomic_exchange(__rte_unused void *arg)
 
        /* Wait until all of the other threads have been dispatched */
        while (rte_atomic32_read(&synchro) == 0)
-               ;
+               rte_pause();
 
        /*
         * Let the battle begin! Every thread attempts to steal the current
-- 
2.51.0

Reply via email to