Replace the use of gcc builtin __atomic_xxx intrinsics with
corresponding rte_atomic_xxx optional rte stdatomic API.

Signed-off-by: Tyler Retzlaff <roret...@linux.microsoft.com>
---
 app/test/test_bpf.c                    |  46 ++++++++-----
 app/test/test_distributor.c            | 114 ++++++++++++++++-----------------
 app/test/test_distributor_perf.c       |   4 +-
 app/test/test_func_reentrancy.c        |  28 ++++----
 app/test/test_hash_multiwriter.c       |  16 ++---
 app/test/test_hash_readwrite.c         |  74 ++++++++++-----------
 app/test/test_hash_readwrite_lf_perf.c |  88 ++++++++++++-------------
 app/test/test_lcores.c                 |  25 ++++----
 app/test/test_lpm_perf.c               |  14 ++--
 app/test/test_mcslock.c                |  12 ++--
 app/test/test_mempool_perf.c           |   9 +--
 app/test/test_pflock.c                 |  13 ++--
 app/test/test_pmd_perf.c               |  10 +--
 app/test/test_rcu_qsbr_perf.c          | 114 +++++++++++++++++----------------
 app/test/test_ring_perf.c              |  11 ++--
 app/test/test_ring_stress_impl.h       |  10 +--
 app/test/test_rwlock.c                 |   9 +--
 app/test/test_seqlock.c                |   6 +-
 app/test/test_service_cores.c          |  24 +++----
 app/test/test_spinlock.c               |   9 +--
 app/test/test_stack_perf.c             |  12 ++--
 app/test/test_threads.c                |  33 +++++-----
 app/test/test_ticketlock.c             |   9 +--
 app/test/test_timer.c                  |  31 +++++----
 24 files changed, 378 insertions(+), 343 deletions(-)

diff --git a/app/test/test_bpf.c b/app/test/test_bpf.c
index 53e3a31..2e43442 100644
--- a/app/test/test_bpf.c
+++ b/app/test/test_bpf.c
@@ -39,8 +39,8 @@
  */
 
 struct dummy_offset {
-       uint64_t u64;
-       uint32_t u32;
+       RTE_ATOMIC(uint64_t) u64;
+       RTE_ATOMIC(uint32_t) u32;
        uint16_t u16;
        uint8_t  u8;
 };
@@ -1581,32 +1581,46 @@ struct bpf_test {
        memset(&dfe, 0, sizeof(dfe));
 
        rv = 1;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = -1;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = (int32_t)TEST_FILL_1;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = TEST_MUL_1;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = TEST_MUL_2;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = TEST_JCC_2;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        rv = TEST_JCC_3;
-       __atomic_fetch_add(&dfe.u32, rv, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&dfe.u64, rv, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit((uint32_t __rte_atomic *)&dfe.u32, rv,
+           rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit((uint64_t __rte_atomic *)&dfe.u64, rv,
+           rte_memory_order_relaxed);
 
        return cmp_res(__func__, 1, rc, &dfe, dft, sizeof(dfe));
 }
diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
index d2037b7..df871e3 100644
--- a/app/test/test_distributor.c
+++ b/app/test/test_distributor.c
@@ -47,14 +47,14 @@ struct worker_params {
 struct worker_params worker_params;
 
 /* statics - all zero-initialized by default */
-static volatile int quit;      /**< general quit variable for all threads */
-static volatile int zero_quit; /**< var for when we just want thr0 to quit*/
-static volatile int zero_sleep; /**< thr0 has quit basic loop and is sleeping*/
-static volatile unsigned worker_idx;
-static volatile unsigned zero_idx;
+static volatile RTE_ATOMIC(int) quit;      /**< general quit variable for all 
threads */
+static volatile RTE_ATOMIC(int) zero_quit; /**< var for when we just want thr0 
to quit*/
+static volatile RTE_ATOMIC(int) zero_sleep; /**< thr0 has quit basic loop and 
is sleeping*/
+static volatile RTE_ATOMIC(unsigned int) worker_idx;
+static volatile RTE_ATOMIC(unsigned int) zero_idx;
 
 struct worker_stats {
-       volatile unsigned handled_packets;
+       volatile RTE_ATOMIC(unsigned int) handled_packets;
 } __rte_cache_aligned;
 struct worker_stats worker_stats[RTE_MAX_LCORE];
 
@@ -66,8 +66,8 @@ struct worker_stats {
 {
        unsigned i, count = 0;
        for (i = 0; i < worker_idx; i++)
-               count += __atomic_load_n(&worker_stats[i].handled_packets,
-                               __ATOMIC_RELAXED);
+               count += 
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                               rte_memory_order_relaxed);
        return count;
 }
 
@@ -77,8 +77,8 @@ struct worker_stats {
 {
        unsigned int i;
        for (i = 0; i < RTE_MAX_LCORE; i++)
-               __atomic_store_n(&worker_stats[i].handled_packets, 0,
-                       __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&worker_stats[i].handled_packets, 0,
+                       rte_memory_order_relaxed);
 }
 
 /* this is the basic worker function for sanity test
@@ -91,17 +91,17 @@ struct worker_stats {
        struct worker_params *wp = arg;
        struct rte_distributor *db = wp->dist;
        unsigned int num;
-       unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+       unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, 
rte_memory_order_relaxed);
 
        num = rte_distributor_get_pkt(db, id, buf, NULL, 0);
        while (!quit) {
-               __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                               __ATOMIC_RELAXED);
+               
rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                               rte_memory_order_relaxed);
                num = rte_distributor_get_pkt(db, id,
                                buf, buf, num);
        }
-       __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                       __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                       rte_memory_order_relaxed);
        rte_distributor_return_pkt(db, id, buf, num);
        return 0;
 }
@@ -162,8 +162,8 @@ struct worker_stats {
 
        for (i = 0; i < rte_lcore_count() - 1; i++)
                printf("Worker %u handled %u packets\n", i,
-                       __atomic_load_n(&worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                       
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                                       rte_memory_order_relaxed));
        printf("Sanity test with all zero hashes done.\n");
 
        /* pick two flows and check they go correctly */
@@ -189,9 +189,9 @@ struct worker_stats {
 
                for (i = 0; i < rte_lcore_count() - 1; i++)
                        printf("Worker %u handled %u packets\n", i,
-                               __atomic_load_n(
+                               rte_atomic_load_explicit(
                                        &worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                                       rte_memory_order_relaxed));
                printf("Sanity test with two hash values done\n");
        }
 
@@ -218,8 +218,8 @@ struct worker_stats {
 
        for (i = 0; i < rte_lcore_count() - 1; i++)
                printf("Worker %u handled %u packets\n", i,
-                       __atomic_load_n(&worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                       
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                                       rte_memory_order_relaxed));
        printf("Sanity test with non-zero hashes done\n");
 
        rte_mempool_put_bulk(p, (void *)bufs, BURST);
@@ -311,18 +311,18 @@ struct worker_stats {
        struct rte_distributor *d = wp->dist;
        unsigned int i;
        unsigned int num;
-       unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+       unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, 
rte_memory_order_relaxed);
 
        num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
        while (!quit) {
-               __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                               __ATOMIC_RELAXED);
+               
rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                               rte_memory_order_relaxed);
                for (i = 0; i < num; i++)
                        rte_pktmbuf_free(buf[i]);
                num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
        }
-       __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                       __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                       rte_memory_order_relaxed);
        rte_distributor_return_pkt(d, id, buf, num);
        return 0;
 }
@@ -381,51 +381,51 @@ struct worker_stats {
        unsigned int num;
        unsigned int zero_id = 0;
        unsigned int zero_unset;
-       const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
-                       __ATOMIC_RELAXED);
+       const unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1,
+                       rte_memory_order_relaxed);
 
        num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
 
        if (num > 0) {
                zero_unset = RTE_MAX_LCORE;
-               __atomic_compare_exchange_n(&zero_idx, &zero_unset, id,
-                       false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
+               rte_atomic_compare_exchange_strong_explicit(&zero_idx, 
&zero_unset, id,
+                       rte_memory_order_acq_rel, rte_memory_order_acquire);
        }
-       zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
+       zero_id = rte_atomic_load_explicit(&zero_idx, rte_memory_order_acquire);
 
        /* wait for quit single globally, or for worker zero, wait
         * for zero_quit */
        while (!quit && !(id == zero_id && zero_quit)) {
-               __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                               __ATOMIC_RELAXED);
+               
rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                               rte_memory_order_relaxed);
                num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
 
                if (num > 0) {
                        zero_unset = RTE_MAX_LCORE;
-                       __atomic_compare_exchange_n(&zero_idx, &zero_unset, id,
-                               false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE);
+                       rte_atomic_compare_exchange_strong_explicit(&zero_idx, 
&zero_unset, id,
+                               rte_memory_order_acq_rel, 
rte_memory_order_acquire);
                }
-               zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
+               zero_id = rte_atomic_load_explicit(&zero_idx, 
rte_memory_order_acquire);
        }
 
-       __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                       __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                       rte_memory_order_relaxed);
        if (id == zero_id) {
                rte_distributor_return_pkt(d, id, NULL, 0);
 
                /* for worker zero, allow it to restart to pick up last packet
                 * when all workers are shutting down.
                 */
-               __atomic_store_n(&zero_sleep, 1, __ATOMIC_RELEASE);
+               rte_atomic_store_explicit(&zero_sleep, 1, 
rte_memory_order_release);
                while (zero_quit)
                        usleep(100);
-               __atomic_store_n(&zero_sleep, 0, __ATOMIC_RELEASE);
+               rte_atomic_store_explicit(&zero_sleep, 0, 
rte_memory_order_release);
 
                num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
 
                while (!quit) {
-                       __atomic_fetch_add(&worker_stats[id].handled_packets,
-                                       num, __ATOMIC_RELAXED);
+                       
rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets,
+                                       num, rte_memory_order_relaxed);
                        num = rte_distributor_get_pkt(d, id, buf, NULL, 0);
                }
        }
@@ -491,17 +491,17 @@ struct worker_stats {
 
        /* flush the distributor */
        rte_distributor_flush(d);
-       while (!__atomic_load_n(&zero_sleep, __ATOMIC_ACQUIRE))
+       while (!rte_atomic_load_explicit(&zero_sleep, rte_memory_order_acquire))
                rte_distributor_flush(d);
 
        zero_quit = 0;
-       while (__atomic_load_n(&zero_sleep, __ATOMIC_ACQUIRE))
+       while (rte_atomic_load_explicit(&zero_sleep, rte_memory_order_acquire))
                rte_delay_us(100);
 
        for (i = 0; i < rte_lcore_count() - 1; i++)
                printf("Worker %u handled %u packets\n", i,
-                       __atomic_load_n(&worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                       
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                                       rte_memory_order_relaxed));
 
        if (total_packet_count() != BURST * 2) {
                printf("Line %d: Error, not all packets flushed. "
@@ -560,18 +560,18 @@ struct worker_stats {
        /* flush the distributor */
        rte_distributor_flush(d);
 
-       while (!__atomic_load_n(&zero_sleep, __ATOMIC_ACQUIRE))
+       while (!rte_atomic_load_explicit(&zero_sleep, rte_memory_order_acquire))
                rte_distributor_flush(d);
 
        zero_quit = 0;
 
-       while (__atomic_load_n(&zero_sleep, __ATOMIC_ACQUIRE))
+       while (rte_atomic_load_explicit(&zero_sleep, rte_memory_order_acquire))
                rte_delay_us(100);
 
        for (i = 0; i < rte_lcore_count() - 1; i++)
                printf("Worker %u handled %u packets\n", i,
-                       __atomic_load_n(&worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                       
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                                       rte_memory_order_relaxed));
 
        if (total_packet_count() != BURST) {
                printf("Line %d: Error, not all packets flushed. "
@@ -596,18 +596,18 @@ struct worker_stats {
        struct worker_params *wp = arg;
        struct rte_distributor *db = wp->dist;
        unsigned int num, i;
-       unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+       unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, 
rte_memory_order_relaxed);
        num = rte_distributor_get_pkt(db, id, buf, NULL, 0);
        while (!quit) {
-               __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                               __ATOMIC_RELAXED);
+               
rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                               rte_memory_order_relaxed);
                for (i = 0; i < num; i++)
                        *seq_field(buf[i]) += id + 1;
                num = rte_distributor_get_pkt(db, id,
                                buf, buf, num);
        }
-       __atomic_fetch_add(&worker_stats[id].handled_packets, num,
-                       __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&worker_stats[id].handled_packets, num,
+                       rte_memory_order_relaxed);
        rte_distributor_return_pkt(db, id, buf, num);
        return 0;
 }
@@ -679,8 +679,8 @@ struct worker_stats {
 
        for (i = 0; i < rte_lcore_count() - 1; i++)
                printf("Worker %u handled %u packets\n", i,
-                       __atomic_load_n(&worker_stats[i].handled_packets,
-                                       __ATOMIC_RELAXED));
+                       
rte_atomic_load_explicit(&worker_stats[i].handled_packets,
+                                       rte_memory_order_relaxed));
 
        /* Sort returned packets by sent order (sequence numbers). */
        for (i = 0; i < buf_count; i++) {
diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
index ca86845..ba3cf26 100644
--- a/app/test/test_distributor_perf.c
+++ b/app/test/test_distributor_perf.c
@@ -31,7 +31,7 @@
 
 /* static vars - zero initialized by default */
 static volatile int quit;
-static volatile unsigned worker_idx;
+static volatile RTE_ATOMIC(unsigned int) worker_idx;
 
 struct worker_stats {
        volatile unsigned handled_packets;
@@ -121,7 +121,7 @@ struct worker_stats {
        struct rte_distributor *d = arg;
        unsigned int num = 0;
        int i;
-       unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+       unsigned int id = rte_atomic_fetch_add_explicit(&worker_idx, 1, 
rte_memory_order_relaxed);
        struct rte_mbuf *buf[8] __rte_cache_aligned;
 
        for (i = 0; i < 8; i++)
diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index 9296de2..bae39af 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -53,12 +53,13 @@
 
 #define MAX_LCORES     (rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
-static uint32_t obj_count;
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) obj_count;
+static RTE_ATOMIC(uint32_t) synchro;
 
 #define WAIT_SYNCHRO_FOR_WORKERS()   do { \
        if (lcore_self != rte_get_main_lcore())                  \
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED); \
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1, \
+                   rte_memory_order_relaxed); \
 } while(0)
 
 /*
@@ -71,7 +72,8 @@
 
        WAIT_SYNCHRO_FOR_WORKERS();
 
-       __atomic_store_n(&obj_count, 1, __ATOMIC_RELAXED); /* silent the check 
in the caller */
+       /* silent the check in the caller */
+       rte_atomic_store_explicit(&obj_count, 1, rte_memory_order_relaxed);
        if (rte_eal_init(0, NULL) != -1)
                return -1;
 
@@ -113,7 +115,7 @@
        for (i = 0; i < MAX_ITER_ONCE; i++) {
                rp = rte_ring_create("fr_test_once", 4096, SOCKET_ID_ANY, 0);
                if (rp != NULL)
-                       __atomic_fetch_add(&obj_count, 1, __ATOMIC_RELAXED);
+                       rte_atomic_fetch_add_explicit(&obj_count, 1, 
rte_memory_order_relaxed);
        }
 
        /* create/lookup new ring several times */
@@ -178,7 +180,7 @@
                                        my_obj_init, NULL,
                                        SOCKET_ID_ANY, 0);
                if (mp != NULL)
-                       __atomic_fetch_add(&obj_count, 1, __ATOMIC_RELAXED);
+                       rte_atomic_fetch_add_explicit(&obj_count, 1, 
rte_memory_order_relaxed);
        }
 
        /* create/lookup new ring several times */
@@ -244,7 +246,7 @@
        for (i = 0; i < MAX_ITER_ONCE; i++) {
                handle = rte_hash_create(&hash_params);
                if (handle != NULL)
-                       __atomic_fetch_add(&obj_count, 1, __ATOMIC_RELAXED);
+                       rte_atomic_fetch_add_explicit(&obj_count, 1, 
rte_memory_order_relaxed);
        }
 
        /* create multiple times simultaneously */
@@ -311,7 +313,7 @@
        for (i = 0; i < MAX_ITER_ONCE; i++) {
                handle = rte_fbk_hash_create(&fbk_params);
                if (handle != NULL)
-                       __atomic_fetch_add(&obj_count, 1, __ATOMIC_RELAXED);
+                       rte_atomic_fetch_add_explicit(&obj_count, 1, 
rte_memory_order_relaxed);
        }
 
        /* create multiple fbk tables simultaneously */
@@ -376,7 +378,7 @@
        for (i = 0; i < MAX_ITER_ONCE; i++) {
                lpm = rte_lpm_create("fr_test_once",  SOCKET_ID_ANY, &config);
                if (lpm != NULL)
-                       __atomic_fetch_add(&obj_count, 1, __ATOMIC_RELAXED);
+                       rte_atomic_fetch_add_explicit(&obj_count, 1, 
rte_memory_order_relaxed);
        }
 
        /* create multiple fbk tables simultaneously */
@@ -437,8 +439,8 @@ struct test_case test_cases[] = {
        if (pt_case->func == NULL)
                return -1;
 
-       __atomic_store_n(&obj_count, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&obj_count, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
 
        cores = RTE_MIN(rte_lcore_count(), MAX_LCORES);
        RTE_LCORE_FOREACH_WORKER(lcore_id) {
@@ -448,7 +450,7 @@ struct test_case test_cases[] = {
                rte_eal_remote_launch(pt_case->func, pt_case->arg, lcore_id);
        }
 
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
 
        if (pt_case->func(pt_case->arg) < 0)
                ret = -1;
@@ -463,7 +465,7 @@ struct test_case test_cases[] = {
                        pt_case->clean(lcore_id);
        }
 
-       count = __atomic_load_n(&obj_count, __ATOMIC_RELAXED);
+       count = rte_atomic_load_explicit(&obj_count, rte_memory_order_relaxed);
        if (count != 1) {
                printf("%s: common object allocated %d times (should be 1)\n",
                        pt_case->name, count);
diff --git a/app/test/test_hash_multiwriter.c b/app/test/test_hash_multiwriter.c
index ed9dd41..33d3147 100644
--- a/app/test/test_hash_multiwriter.c
+++ b/app/test/test_hash_multiwriter.c
@@ -43,8 +43,8 @@ struct {
 const uint32_t nb_total_tsx_insertion = 4.5*1024*1024;
 uint32_t rounded_nb_total_tsx_insertion;
 
-static uint64_t gcycles;
-static uint64_t ginsertions;
+static RTE_ATOMIC(uint64_t) gcycles;
+static RTE_ATOMIC(uint64_t) ginsertions;
 
 static int use_htm;
 
@@ -84,8 +84,8 @@ struct {
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gcycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&ginsertions, i - offset, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gcycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&ginsertions, i - offset, 
rte_memory_order_relaxed);
 
        for (; i < offset + tbl_multiwriter_test_params.nb_tsx_insertion; i++)
                tbl_multiwriter_test_params.keys[i]
@@ -166,8 +166,8 @@ struct {
 
        tbl_multiwriter_test_params.found = found;
 
-       __atomic_store_n(&gcycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&ginsertions, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&gcycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&ginsertions, 0, rte_memory_order_relaxed);
 
        /* Get list of enabled cores */
        i = 0;
@@ -233,8 +233,8 @@ struct {
        printf("No key corrupted during multiwriter insertion.\n");
 
        unsigned long long int cycles_per_insertion =
-               __atomic_load_n(&gcycles, __ATOMIC_RELAXED)/
-               __atomic_load_n(&ginsertions, __ATOMIC_RELAXED);
+               rte_atomic_load_explicit(&gcycles, rte_memory_order_relaxed)/
+               rte_atomic_load_explicit(&ginsertions, 
rte_memory_order_relaxed);
 
        printf(" cycles per insertion: %llu\n", cycles_per_insertion);
 
diff --git a/app/test/test_hash_readwrite.c b/app/test/test_hash_readwrite.c
index 4997a01..1867376 100644
--- a/app/test/test_hash_readwrite.c
+++ b/app/test/test_hash_readwrite.c
@@ -45,14 +45,14 @@ struct {
        struct rte_hash *h;
 } tbl_rw_test_param;
 
-static uint64_t gcycles;
-static uint64_t ginsertions;
+static RTE_ATOMIC(uint64_t) gcycles;
+static RTE_ATOMIC(uint64_t) ginsertions;
 
-static uint64_t gread_cycles;
-static uint64_t gwrite_cycles;
+static RTE_ATOMIC(uint64_t) gread_cycles;
+static RTE_ATOMIC(uint64_t) gwrite_cycles;
 
-static uint64_t greads;
-static uint64_t gwrites;
+static RTE_ATOMIC(uint64_t) greads;
+static RTE_ATOMIC(uint64_t) gwrites;
 
 static int
 test_hash_readwrite_worker(__rte_unused void *arg)
@@ -110,8 +110,8 @@ struct {
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gcycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&ginsertions, i - offset, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gcycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&ginsertions, i - offset, 
rte_memory_order_relaxed);
 
        for (; i < offset + tbl_rw_test_param.num_insert; i++)
                tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
@@ -209,8 +209,8 @@ struct {
        int worker_cnt = rte_lcore_count() - 1;
        uint32_t tot_insert = 0;
 
-       __atomic_store_n(&gcycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&ginsertions, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&gcycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&ginsertions, 0, rte_memory_order_relaxed);
 
        if (init_params(use_ext, use_htm, use_rw_lf, use_jhash) != 0)
                goto err;
@@ -269,8 +269,8 @@ struct {
        printf("No key corrupted during read-write test.\n");
 
        unsigned long long int cycles_per_insertion =
-               __atomic_load_n(&gcycles, __ATOMIC_RELAXED) /
-               __atomic_load_n(&ginsertions, __ATOMIC_RELAXED);
+               rte_atomic_load_explicit(&gcycles, rte_memory_order_relaxed) /
+               rte_atomic_load_explicit(&ginsertions, 
rte_memory_order_relaxed);
 
        printf("cycles per insertion and lookup: %llu\n", cycles_per_insertion);
 
@@ -310,8 +310,8 @@ struct {
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gread_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&greads, i, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gread_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&greads, i, rte_memory_order_relaxed);
        return 0;
 }
 
@@ -344,9 +344,9 @@ struct {
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gwrite_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&gwrites, tbl_rw_test_param.num_insert,
-                                                       __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gwrite_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&gwrites, tbl_rw_test_param.num_insert,
+                                                       
rte_memory_order_relaxed);
        return 0;
 }
 
@@ -369,11 +369,11 @@ struct {
 
        uint64_t start = 0, end = 0;
 
-       __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&gwrites, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&greads, 0, rte_memory_order_relaxed);
 
-       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&gread_cycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&gwrite_cycles, 0, rte_memory_order_relaxed);
 
        if (init_params(0, use_htm, 0, use_jhash) != 0)
                goto err;
@@ -430,10 +430,10 @@ struct {
                if (tot_worker_lcore < core_cnt[n] * 2)
                        goto finish;
 
-               __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&greads, 0, rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gwrites, 0, 
rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gwrite_cycles, 0, 
rte_memory_order_relaxed);
 
                rte_hash_reset(tbl_rw_test_param.h);
 
@@ -475,8 +475,8 @@ struct {
 
                if (reader_faster) {
                        unsigned long long int cycles_per_insertion =
-                               __atomic_load_n(&gread_cycles, 
__ATOMIC_RELAXED) /
-                               __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed) /
+                               rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        perf_results->read_only[n] = cycles_per_insertion;
                        printf("Reader only: cycles per lookup: %llu\n",
                                                        cycles_per_insertion);
@@ -484,17 +484,17 @@ struct {
 
                else {
                        unsigned long long int cycles_per_insertion =
-                               __atomic_load_n(&gwrite_cycles, 
__ATOMIC_RELAXED) /
-                               __atomic_load_n(&gwrites, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gwrite_cycles, 
rte_memory_order_relaxed) /
+                               rte_atomic_load_explicit(&gwrites, 
rte_memory_order_relaxed);
                        perf_results->write_only[n] = cycles_per_insertion;
                        printf("Writer only: cycles per writes: %llu\n",
                                                        cycles_per_insertion);
                }
 
-               __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&greads, 0, rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gwrites, 0, 
rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gwrite_cycles, 0, 
rte_memory_order_relaxed);
 
                rte_hash_reset(tbl_rw_test_param.h);
 
@@ -569,8 +569,8 @@ struct {
 
                if (reader_faster) {
                        unsigned long long int cycles_per_insertion =
-                               __atomic_load_n(&gread_cycles, 
__ATOMIC_RELAXED) /
-                               __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed) /
+                               rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        perf_results->read_write_r[n] = cycles_per_insertion;
                        printf("Read-write cycles per lookup: %llu\n",
                                                        cycles_per_insertion);
@@ -578,8 +578,8 @@ struct {
 
                else {
                        unsigned long long int cycles_per_insertion =
-                               __atomic_load_n(&gwrite_cycles, 
__ATOMIC_RELAXED) /
-                               __atomic_load_n(&gwrites, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gwrite_cycles, 
rte_memory_order_relaxed) /
+                               rte_atomic_load_explicit(&gwrites, 
rte_memory_order_relaxed);
                        perf_results->read_write_w[n] = cycles_per_insertion;
                        printf("Read-write cycles per writes: %llu\n",
                                                        cycles_per_insertion);
diff --git a/app/test/test_hash_readwrite_lf_perf.c 
b/app/test/test_hash_readwrite_lf_perf.c
index 5d18850..4523985 100644
--- a/app/test/test_hash_readwrite_lf_perf.c
+++ b/app/test/test_hash_readwrite_lf_perf.c
@@ -86,10 +86,10 @@ struct rwc_perf {
        struct rte_hash *h;
 } tbl_rwc_test_param;
 
-static uint64_t gread_cycles;
-static uint64_t greads;
-static uint64_t gwrite_cycles;
-static uint64_t gwrites;
+static RTE_ATOMIC(uint64_t) gread_cycles;
+static RTE_ATOMIC(uint64_t) greads;
+static RTE_ATOMIC(uint64_t) gwrite_cycles;
+static RTE_ATOMIC(uint64_t) gwrites;
 
 static volatile uint8_t writer_done;
 
@@ -651,8 +651,8 @@ struct rwc_perf {
        } while (!writer_done);
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gread_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&greads, read_cnt*loop_cnt, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gread_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&greads, read_cnt*loop_cnt, 
rte_memory_order_relaxed);
        return 0;
 }
 
@@ -724,8 +724,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        writer_done = 0;
@@ -742,8 +742,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_no_ks_r_hit[m][n]
                                                = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
@@ -791,8 +791,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        writer_done = 0;
@@ -811,8 +811,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_no_ks_r_miss[m][n]
                                                = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
@@ -861,8 +861,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        writer_done = 0;
@@ -884,8 +884,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_ks_r_hit_nsp[m][n]
                                                = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
@@ -935,8 +935,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        writer_done = 0;
@@ -958,8 +958,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_ks_r_hit_sp[m][n]
                                                = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
@@ -1007,8 +1007,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        writer_done = 0;
@@ -1030,8 +1030,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_ks_r_miss[m][n] = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
                }
@@ -1087,9 +1087,9 @@ struct rwc_perf {
                                printf("\nNumber of readers: %u\n",
                                       rwc_core_cnt[n]);
 
-                               __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                               __atomic_store_n(&gread_cycles, 0,
-                                                __ATOMIC_RELAXED);
+                               rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                               rte_atomic_store_explicit(&gread_cycles, 0,
+                                                rte_memory_order_relaxed);
 
                                rte_hash_reset(tbl_rwc_test_param.h);
                                writer_done = 0;
@@ -1127,10 +1127,10 @@ struct rwc_perf {
                                                goto err;
 
                                unsigned long long cycles_per_lookup =
-                                       __atomic_load_n(&gread_cycles,
-                                                       __ATOMIC_RELAXED) /
-                                       __atomic_load_n(&greads,
-                                                         __ATOMIC_RELAXED);
+                                       rte_atomic_load_explicit(&gread_cycles,
+                                                       
rte_memory_order_relaxed) /
+                                       rte_atomic_load_explicit(&greads,
+                                                         
rte_memory_order_relaxed);
                                rwc_perf_results->multi_rw[m][k][n]
                                        = cycles_per_lookup;
                                printf("Cycles per lookup: %llu\n",
@@ -1178,8 +1178,8 @@ struct rwc_perf {
 
                        printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
 
-                       __atomic_store_n(&greads, 0, __ATOMIC_RELAXED);
-                       __atomic_store_n(&gread_cycles, 0, __ATOMIC_RELAXED);
+                       rte_atomic_store_explicit(&greads, 0, 
rte_memory_order_relaxed);
+                       rte_atomic_store_explicit(&gread_cycles, 0, 
rte_memory_order_relaxed);
 
                        rte_hash_reset(tbl_rwc_test_param.h);
                        write_type = WRITE_NO_KEY_SHIFT;
@@ -1210,8 +1210,8 @@ struct rwc_perf {
                                        goto err;
 
                        unsigned long long cycles_per_lookup =
-                               __atomic_load_n(&gread_cycles, __ATOMIC_RELAXED)
-                               / __atomic_load_n(&greads, __ATOMIC_RELAXED);
+                               rte_atomic_load_explicit(&gread_cycles, 
rte_memory_order_relaxed)
+                               / rte_atomic_load_explicit(&greads, 
rte_memory_order_relaxed);
                        rwc_perf_results->w_ks_r_hit_extbkt[m][n]
                                                = cycles_per_lookup;
                        printf("Cycles per lookup: %llu\n", cycles_per_lookup);
@@ -1280,9 +1280,9 @@ struct rwc_perf {
                                tbl_rwc_test_param.keys_no_ks + i);
        }
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&gwrite_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&gwrites, tbl_rwc_test_param.single_insert,
-                          __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gwrite_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&gwrites, 
tbl_rwc_test_param.single_insert,
+                          rte_memory_order_relaxed);
        return 0;
 }
 
@@ -1328,8 +1328,8 @@ struct rwc_perf {
                                rwc_core_cnt[n];
                printf("\nNumber of writers: %u\n", rwc_core_cnt[n]);
 
-               __atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
-               __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&gwrites, 0, 
rte_memory_order_relaxed);
+               rte_atomic_store_explicit(&gwrite_cycles, 0, 
rte_memory_order_relaxed);
 
                rte_hash_reset(tbl_rwc_test_param.h);
                rte_rcu_qsbr_init(rv, RTE_MAX_LCORE);
@@ -1364,8 +1364,8 @@ struct rwc_perf {
                rte_eal_mp_wait_lcore();
 
                unsigned long long cycles_per_write_operation =
-                       __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-                       __atomic_load_n(&gwrites, __ATOMIC_RELAXED);
+                       rte_atomic_load_explicit(&gwrite_cycles, 
rte_memory_order_relaxed) /
+                       rte_atomic_load_explicit(&gwrites, 
rte_memory_order_relaxed);
                rwc_perf_results->writer_add_del[n]
                                        = cycles_per_write_operation;
                printf("Cycles per write operation: %llu\n",
diff --git a/app/test/test_lcores.c b/app/test/test_lcores.c
index 3434a0d..bd5c0dd 100644
--- a/app/test/test_lcores.c
+++ b/app/test/test_lcores.c
@@ -10,6 +10,7 @@
 #include <rte_errno.h>
 #include <rte_lcore.h>
 #include <rte_thread.h>
+#include <rte_stdatomic.h>
 
 #include "test.h"
 
@@ -25,7 +26,7 @@ struct thread_context {
        enum { Thread_INIT, Thread_ERROR, Thread_DONE } state;
        bool lcore_id_any;
        rte_thread_t id;
-       unsigned int *registered_count;
+       RTE_ATOMIC(unsigned int) *registered_count;
 };
 
 static uint32_t thread_loop(void *arg)
@@ -49,10 +50,10 @@ static uint32_t thread_loop(void *arg)
                t->state = Thread_ERROR;
        }
        /* Report register happened to the control thread. */
-       __atomic_fetch_add(t->registered_count, 1, __ATOMIC_RELEASE);
+       rte_atomic_fetch_add_explicit(t->registered_count, 1, 
rte_memory_order_release);
 
        /* Wait for release from the control thread. */
-       while (__atomic_load_n(t->registered_count, __ATOMIC_ACQUIRE) != 0)
+       while (rte_atomic_load_explicit(t->registered_count, 
rte_memory_order_acquire) != 0)
                sched_yield();
        rte_thread_unregister();
        lcore_id = rte_lcore_id();
@@ -73,7 +74,7 @@ static uint32_t thread_loop(void *arg)
 {
        struct thread_context thread_contexts[RTE_MAX_LCORE];
        unsigned int non_eal_threads_count;
-       unsigned int registered_count;
+       RTE_ATOMIC(unsigned int) registered_count;
        struct thread_context *t;
        unsigned int i;
        int ret;
@@ -93,7 +94,7 @@ static uint32_t thread_loop(void *arg)
        }
        printf("non-EAL threads count: %u\n", non_eal_threads_count);
        /* Wait all non-EAL threads to register. */
-       while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+       while (rte_atomic_load_explicit(&registered_count, 
rte_memory_order_acquire) !=
                        non_eal_threads_count)
                sched_yield();
 
@@ -109,14 +110,14 @@ static uint32_t thread_loop(void *arg)
        if (rte_thread_create(&t->id, NULL, thread_loop, t) == 0) {
                non_eal_threads_count++;
                printf("non-EAL threads count: %u\n", non_eal_threads_count);
-               while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+               while (rte_atomic_load_explicit(&registered_count, 
rte_memory_order_acquire) !=
                                non_eal_threads_count)
                        sched_yield();
        }
 
 skip_lcore_any:
        /* Release all threads, and check their states. */
-       __atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&registered_count, 0, 
rte_memory_order_release);
        ret = 0;
        for (i = 0; i < non_eal_threads_count; i++) {
                t = &thread_contexts[i];
@@ -225,7 +226,7 @@ struct limit_lcore_context {
        struct thread_context thread_contexts[2];
        unsigned int non_eal_threads_count = 0;
        struct limit_lcore_context l[2] = {};
-       unsigned int registered_count = 0;
+       RTE_ATOMIC(unsigned int) registered_count = 0;
        struct thread_context *t;
        void *handle[2] = {};
        unsigned int i;
@@ -275,7 +276,7 @@ struct limit_lcore_context {
        if (rte_thread_create(&t->id, NULL, thread_loop, t) != 0)
                goto cleanup_threads;
        non_eal_threads_count++;
-       while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+       while (rte_atomic_load_explicit(&registered_count, 
rte_memory_order_acquire) !=
                        non_eal_threads_count)
                sched_yield();
        if (l[0].init != eal_threads_count + 1 ||
@@ -298,7 +299,7 @@ struct limit_lcore_context {
        if (rte_thread_create(&t->id, NULL, thread_loop, t) != 0)
                goto cleanup_threads;
        non_eal_threads_count++;
-       while (__atomic_load_n(&registered_count, __ATOMIC_ACQUIRE) !=
+       while (rte_atomic_load_explicit(&registered_count, 
rte_memory_order_acquire) !=
                        non_eal_threads_count)
                sched_yield();
        if (l[0].init != eal_threads_count + 2 ||
@@ -315,7 +316,7 @@ struct limit_lcore_context {
        }
        rte_lcore_dump(stdout);
        /* Release all threads, and check their states. */
-       __atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&registered_count, 0, 
rte_memory_order_release);
        ret = 0;
        for (i = 0; i < non_eal_threads_count; i++) {
                t = &thread_contexts[i];
@@ -337,7 +338,7 @@ struct limit_lcore_context {
 
 cleanup_threads:
        /* Release all threads */
-       __atomic_store_n(&registered_count, 0, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&registered_count, 0, 
rte_memory_order_release);
        for (i = 0; i < non_eal_threads_count; i++) {
                t = &thread_contexts[i];
                rte_thread_join(t->id, NULL);
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 82daf9e..bc4bdde 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -22,8 +22,8 @@
 struct rte_lpm *lpm;
 static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
-static volatile uint32_t thr_id;
-static uint64_t gwrite_cycles;
+static volatile RTE_ATOMIC(uint32_t) thr_id;
+static RTE_ATOMIC(uint64_t) gwrite_cycles;
 static uint32_t num_writers;
 
 /* LPM APIs are not thread safe, use spinlock */
@@ -362,7 +362,7 @@ static void generate_large_route_rule_table(void)
 {
        uint32_t tmp_thr_id;
 
-       tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
+       tmp_thr_id = rte_atomic_fetch_add_explicit(&thr_id, 1, 
rte_memory_order_relaxed);
        if (tmp_thr_id >= RTE_MAX_LCORE)
                printf("Invalid thread id %u\n", tmp_thr_id);
 
@@ -470,7 +470,7 @@ static void generate_large_route_rule_table(void)
 
        total_cycles = rte_rdtsc_precise() - begin;
 
-       __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&gwrite_cycles, total_cycles, 
rte_memory_order_relaxed);
 
        return 0;
 
@@ -540,9 +540,9 @@ static void generate_large_route_rule_table(void)
                        reader_f = test_lpm_reader;
 
                writer_done = 0;
-               __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&gwrite_cycles, 0, 
rte_memory_order_relaxed);
 
-               __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+               rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst);
 
                /* Launch reader threads */
                for (i = j; i < num_cores; i++)
@@ -563,7 +563,7 @@ static void generate_large_route_rule_table(void)
                printf("Total LPM Adds: %d\n", TOTAL_WRITES);
                printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
                printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-                       __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+                       rte_atomic_load_explicit(&gwrite_cycles, 
rte_memory_order_relaxed)
                        / TOTAL_WRITES);
 
                writer_done = 1;
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index 46ff13c..8fcbc11 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -42,7 +42,7 @@
 
 static unsigned int count;
 
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 static int
 test_mcslock_per_core(__rte_unused void *arg)
@@ -75,7 +75,7 @@
        rte_mcslock_t ml_perf_me;
 
        /* wait synchro */
-       rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+       rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1, 
rte_memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (lcount < MAX_LOOP) {
@@ -100,14 +100,14 @@
        const unsigned int lcore = rte_lcore_id();
 
        printf("\nTest with no lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
        memset(time_count, 0, sizeof(time_count));
 
        printf("\nTest with lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        lock = 1;
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
@@ -116,11 +116,11 @@
 
        printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
 
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 96de347..35f0597 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -88,7 +88,7 @@
 static int use_external_cache;
 static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
 
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 /* number of objects in one bulk operation (get or put) */
 static unsigned n_get_bulk;
@@ -188,7 +188,8 @@ struct mempool_test_stats {
 
        /* wait synchro for workers */
        if (lcore_id != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        start_cycles = rte_get_timer_cycles();
 
@@ -233,7 +234,7 @@ struct mempool_test_stats {
        int ret;
        unsigned cores_save = cores;
 
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
 
        /* reset stats */
        memset(stats, 0, sizeof(stats));
@@ -258,7 +259,7 @@ struct mempool_test_stats {
        }
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
 
        ret = per_lcore_mempool_test(mp);
 
diff --git a/app/test/test_pflock.c b/app/test/test_pflock.c
index 5f77b15..d989a68 100644
--- a/app/test/test_pflock.c
+++ b/app/test/test_pflock.c
@@ -31,7 +31,7 @@
 
 static rte_pflock_t sl;
 static rte_pflock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 static int
 test_pflock_per_core(__rte_unused void *arg)
@@ -69,7 +69,8 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcount < MAX_LOOP) {
@@ -99,7 +100,7 @@
        const unsigned int lcore = rte_lcore_id();
 
        printf("\nTest with no lock on single core...\n");
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
@@ -107,7 +108,7 @@
 
        printf("\nTest with phase-fair lock on single core...\n");
        lock = 1;
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
        printf("Core [%u] Cost Time = %"PRIu64" us\n",
                        lcore, time_count[lcore]);
@@ -116,12 +117,12 @@
        printf("\nPhase-fair test on %u cores...\n", rte_lcore_count());
 
        /* clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
        if (rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN) < 0)
                return -1;
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_pmd_perf.c b/app/test/test_pmd_perf.c
index f6d97f2..46ae80d 100644
--- a/app/test/test_pmd_perf.c
+++ b/app/test/test_pmd_perf.c
@@ -537,7 +537,7 @@ enum {
        return 0;
 }
 
-static uint64_t start;
+static RTE_ATOMIC(uint64_t) start;
 
 static inline int
 poll_burst(void *args)
@@ -575,7 +575,7 @@ enum {
                num[portid] = pkt_per_port;
        }
 
-       rte_wait_until_equal_64(&start, 1, __ATOMIC_ACQUIRE);
+       rte_wait_until_equal_64((uint64_t *)(uintptr_t)&start, 1, 
rte_memory_order_acquire);
 
        cur_tsc = rte_rdtsc();
        while (total) {
@@ -629,9 +629,9 @@ enum {
 
        /* only when polling first */
        if (flags == SC_BURST_POLL_FIRST)
-               __atomic_store_n(&start, 1, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&start, 1, rte_memory_order_relaxed);
        else
-               __atomic_store_n(&start, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&start, 0, rte_memory_order_relaxed);
 
        /* start polling thread
         * if in POLL_FIRST mode, poll once launched;
@@ -655,7 +655,7 @@ enum {
 
        /* only when polling second  */
        if (flags == SC_BURST_XMIT_FIRST)
-               __atomic_store_n(&start, 1, __ATOMIC_RELEASE);
+               rte_atomic_store_explicit(&start, 1, rte_memory_order_release);
 
        /* wait for polling finished */
        diff_tsc = rte_eal_wait_lcore(lcore);
diff --git a/app/test/test_rcu_qsbr_perf.c b/app/test/test_rcu_qsbr_perf.c
index ce88a73..d1bf5c5 100644
--- a/app/test/test_rcu_qsbr_perf.c
+++ b/app/test/test_rcu_qsbr_perf.c
@@ -25,13 +25,15 @@
 static uint32_t *hash_data[TOTAL_ENTRY];
 static volatile uint8_t writer_done;
 static volatile uint8_t all_registered;
-static volatile uint32_t thr_id;
+static volatile RTE_ATOMIC(uint32_t) thr_id;
 
 static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];
 static struct rte_hash *h;
 static char hash_name[8];
-static uint64_t updates, checks;
-static uint64_t update_cycles, check_cycles;
+static RTE_ATOMIC(uint64_t) updates;
+static RTE_ATOMIC(uint64_t) checks;
+static RTE_ATOMIC(uint64_t) update_cycles;
+static RTE_ATOMIC(uint64_t) check_cycles;
 
 /* Scale down results to 1000 operations to support lower
  * granularity clocks.
@@ -44,7 +46,7 @@
 {
        uint32_t tmp_thr_id;
 
-       tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
+       tmp_thr_id = rte_atomic_fetch_add_explicit(&thr_id, 1, 
rte_memory_order_relaxed);
        if (tmp_thr_id >= RTE_MAX_LCORE)
                printf("Invalid thread id %u\n", tmp_thr_id);
 
@@ -81,8 +83,8 @@
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&update_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&updates, loop_cnt, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&update_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&updates, loop_cnt, 
rte_memory_order_relaxed);
 
        /* Make the thread offline */
        rte_rcu_qsbr_thread_offline(t[0], thread_id);
@@ -113,8 +115,8 @@
        } while (loop_cnt < 20000000);
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&checks, loop_cnt, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&check_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&checks, loop_cnt, 
rte_memory_order_relaxed);
        return 0;
 }
 
@@ -130,15 +132,15 @@
 
        writer_done = 0;
 
-       __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&updates, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&update_cycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&checks, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&check_cycles, 0, rte_memory_order_relaxed);
 
        printf("\nPerf Test: %d Readers/1 Writer('wait' in qsbr_check == 
true)\n",
                num_cores - 1);
 
-       __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+       rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst);
 
        if (all_registered == 1)
                tmp_num_cores = num_cores - 1;
@@ -168,15 +170,16 @@
        rte_eal_mp_wait_lcore();
 
        printf("Total quiescent state updates = %"PRIi64"\n",
-               __atomic_load_n(&updates, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&updates, rte_memory_order_relaxed));
        printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
                RCU_SCALE_DOWN,
-               __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
-               (__atomic_load_n(&updates, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
-       printf("Total RCU checks = %"PRIi64"\n", __atomic_load_n(&checks, 
__ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&update_cycles, 
rte_memory_order_relaxed) /
+               (rte_atomic_load_explicit(&updates, rte_memory_order_relaxed) / 
RCU_SCALE_DOWN));
+       printf("Total RCU checks = %"PRIi64"\n", 
rte_atomic_load_explicit(&checks,
+           rte_memory_order_relaxed));
        printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
-               __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
-               (__atomic_load_n(&checks, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
+               rte_atomic_load_explicit(&check_cycles, 
rte_memory_order_relaxed) /
+               (rte_atomic_load_explicit(&checks, rte_memory_order_relaxed) / 
RCU_SCALE_DOWN));
 
        rte_free(t[0]);
 
@@ -193,10 +196,10 @@
        size_t sz;
        unsigned int i, tmp_num_cores;
 
-       __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&updates, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&update_cycles, 0, rte_memory_order_relaxed);
 
-       __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+       rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst);
 
        printf("\nPerf Test: %d Readers\n", num_cores);
 
@@ -220,11 +223,11 @@
        rte_eal_mp_wait_lcore();
 
        printf("Total quiescent state updates = %"PRIi64"\n",
-               __atomic_load_n(&updates, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&updates, rte_memory_order_relaxed));
        printf("Cycles per %d quiescent state updates: %"PRIi64"\n",
                RCU_SCALE_DOWN,
-               __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
-               (__atomic_load_n(&updates, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
+               rte_atomic_load_explicit(&update_cycles, 
rte_memory_order_relaxed) /
+               (rte_atomic_load_explicit(&updates, rte_memory_order_relaxed) / 
RCU_SCALE_DOWN));
 
        rte_free(t[0]);
 
@@ -241,10 +244,10 @@
        size_t sz;
        unsigned int i;
 
-       __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&checks, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&check_cycles, 0, rte_memory_order_relaxed);
 
-       __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+       rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst);
 
        printf("\nPerf test: %d Writers ('wait' in qsbr_check == false)\n",
                num_cores);
@@ -266,10 +269,11 @@
        /* Wait until all readers have exited */
        rte_eal_mp_wait_lcore();
 
-       printf("Total RCU checks = %"PRIi64"\n", __atomic_load_n(&checks, 
__ATOMIC_RELAXED));
+       printf("Total RCU checks = %"PRIi64"\n", 
rte_atomic_load_explicit(&checks,
+           rte_memory_order_relaxed));
        printf("Cycles per %d checks: %"PRIi64"\n", RCU_SCALE_DOWN,
-               __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
-               (__atomic_load_n(&checks, __ATOMIC_RELAXED) / RCU_SCALE_DOWN));
+               rte_atomic_load_explicit(&check_cycles, 
rte_memory_order_relaxed) /
+               (rte_atomic_load_explicit(&checks, rte_memory_order_relaxed) / 
RCU_SCALE_DOWN));
 
        rte_free(t[0]);
 
@@ -317,8 +321,8 @@
        } while (!writer_done);
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&update_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&updates, loop_cnt, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&update_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&updates, loop_cnt, 
rte_memory_order_relaxed);
 
        rte_rcu_qsbr_thread_unregister(temp, thread_id);
 
@@ -389,12 +393,12 @@ static struct rte_hash *init_hash(void)
 
        writer_done = 0;
 
-       __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&updates, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&update_cycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&checks, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&check_cycles, 0, rte_memory_order_relaxed);
 
-       __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+       rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_seq_cst);
 
        printf("\nPerf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR 
Query, Blocking QSBR Check\n", num_cores);
 
@@ -453,8 +457,8 @@ static struct rte_hash *init_hash(void)
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&checks, i, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&check_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&checks, i, rte_memory_order_relaxed);
 
        writer_done = 1;
 
@@ -467,12 +471,12 @@ static struct rte_hash *init_hash(void)
 
        printf("Following numbers include calls to rte_hash functions\n");
        printf("Cycles per 1 quiescent state update(online/update/offline): 
%"PRIi64"\n",
-               __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
-               __atomic_load_n(&updates, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&update_cycles, 
rte_memory_order_relaxed) /
+               rte_atomic_load_explicit(&updates, rte_memory_order_relaxed));
 
        printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
-               __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
-               __atomic_load_n(&checks, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&check_cycles, 
rte_memory_order_relaxed) /
+               rte_atomic_load_explicit(&checks, rte_memory_order_relaxed));
 
        rte_free(t[0]);
 
@@ -511,7 +515,7 @@ static struct rte_hash *init_hash(void)
 
        printf("Perf test: 1 writer, %d readers, 1 QSBR variable, 1 QSBR Query, 
Non-Blocking QSBR check\n", num_cores);
 
-       __atomic_store_n(&thr_id, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&thr_id, 0, rte_memory_order_relaxed);
 
        if (all_registered == 1)
                tmp_num_cores = num_cores;
@@ -570,8 +574,8 @@ static struct rte_hash *init_hash(void)
        }
 
        cycles = rte_rdtsc_precise() - begin;
-       __atomic_fetch_add(&check_cycles, cycles, __ATOMIC_RELAXED);
-       __atomic_fetch_add(&checks, i, __ATOMIC_RELAXED);
+       rte_atomic_fetch_add_explicit(&check_cycles, cycles, 
rte_memory_order_relaxed);
+       rte_atomic_fetch_add_explicit(&checks, i, rte_memory_order_relaxed);
 
        writer_done = 1;
        /* Wait and check return value from reader threads */
@@ -583,12 +587,12 @@ static struct rte_hash *init_hash(void)
 
        printf("Following numbers include calls to rte_hash functions\n");
        printf("Cycles per 1 quiescent state update(online/update/offline): 
%"PRIi64"\n",
-               __atomic_load_n(&update_cycles, __ATOMIC_RELAXED) /
-               __atomic_load_n(&updates, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&update_cycles, 
rte_memory_order_relaxed) /
+               rte_atomic_load_explicit(&updates, rte_memory_order_relaxed));
 
        printf("Cycles per 1 check(start, check): %"PRIi64"\n\n",
-               __atomic_load_n(&check_cycles, __ATOMIC_RELAXED) /
-               __atomic_load_n(&checks, __ATOMIC_RELAXED));
+               rte_atomic_load_explicit(&check_cycles, 
rte_memory_order_relaxed) /
+               rte_atomic_load_explicit(&checks, rte_memory_order_relaxed));
 
        rte_free(t[0]);
 
@@ -622,10 +626,10 @@ static struct rte_hash *init_hash(void)
                return TEST_SKIPPED;
        }
 
-       __atomic_store_n(&updates, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&update_cycles, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&checks, 0, __ATOMIC_RELAXED);
-       __atomic_store_n(&check_cycles, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&updates, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&update_cycles, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&checks, 0, rte_memory_order_relaxed);
+       rte_atomic_store_explicit(&check_cycles, 0, rte_memory_order_relaxed);
 
        num_cores = 0;
        RTE_LCORE_FOREACH_WORKER(core_id) {
diff --git a/app/test/test_ring_perf.c b/app/test/test_ring_perf.c
index d7c5a4c..6d7a0a8 100644
--- a/app/test/test_ring_perf.c
+++ b/app/test/test_ring_perf.c
@@ -186,7 +186,7 @@ struct thread_params {
        void *burst = NULL;
 
 #ifdef RTE_USE_C11_MEM_MODEL
-       if (__atomic_fetch_add(&lcore_count, 1, __ATOMIC_RELAXED) + 1 != 2)
+       if (rte_atomic_fetch_add_explicit(&lcore_count, 1, 
rte_memory_order_relaxed) + 1 != 2)
 #else
        if (__sync_add_and_fetch(&lcore_count, 1) != 2)
 #endif
@@ -320,7 +320,7 @@ struct thread_params {
        return 0;
 }
 
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 static uint64_t queue_count[RTE_MAX_LCORE];
 
 #define TIME_MS 100
@@ -342,7 +342,8 @@ struct thread_params {
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (time_diff < hz * TIME_MS / 1000) {
@@ -397,12 +398,12 @@ struct thread_params {
                param.r = r;
 
                /* clear synchro and start workers */
-               __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&synchro, 0, 
rte_memory_order_relaxed);
                if (rte_eal_mp_remote_launch(lcore_f, &param, SKIP_MAIN) < 0)
                        return -1;
 
                /* start synchro and launch test on main */
-               __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&synchro, 1, 
rte_memory_order_relaxed);
                lcore_f(&param);
 
                rte_eal_mp_wait_lcore();
diff --git a/app/test/test_ring_stress_impl.h b/app/test/test_ring_stress_impl.h
index 2dec897..e6b23c0 100644
--- a/app/test/test_ring_stress_impl.h
+++ b/app/test/test_ring_stress_impl.h
@@ -24,7 +24,7 @@ enum {
        WRK_CMD_RUN,
 };
 
-static uint32_t wrk_cmd __rte_cache_aligned = WRK_CMD_STOP;
+static RTE_ATOMIC(uint32_t) wrk_cmd __rte_cache_aligned = WRK_CMD_STOP;
 
 /* test run-time in seconds */
 static const uint32_t run_time = 60;
@@ -203,7 +203,7 @@ struct ring_elem {
         * really releasing any data through 'wrk_cmd' to
         * the worker.
         */
-       while (__atomic_load_n(&wrk_cmd, __ATOMIC_RELAXED) != WRK_CMD_RUN)
+       while (rte_atomic_load_explicit(&wrk_cmd, rte_memory_order_relaxed) != 
WRK_CMD_RUN)
                rte_pause();
 
        cl = rte_rdtsc_precise();
@@ -246,7 +246,7 @@ struct ring_elem {
 
                lcore_stat_update(&la->stats, 1, num, tm0 + tm1, prcs);
 
-       } while (__atomic_load_n(&wrk_cmd, __ATOMIC_RELAXED) == WRK_CMD_RUN);
+       } while (rte_atomic_load_explicit(&wrk_cmd, rte_memory_order_relaxed) 
== WRK_CMD_RUN);
 
        cl = rte_rdtsc_precise() - cl;
        if (prcs == 0)
@@ -360,12 +360,12 @@ struct ring_elem {
        }
 
        /* signal worker to start test */
-       __atomic_store_n(&wrk_cmd, WRK_CMD_RUN, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&wrk_cmd, WRK_CMD_RUN, 
rte_memory_order_release);
 
        rte_delay_us(run_time * US_PER_S);
 
        /* signal worker to start test */
-       __atomic_store_n(&wrk_cmd, WRK_CMD_STOP, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&wrk_cmd, WRK_CMD_STOP, 
rte_memory_order_release);
 
        /* wait for workers and collect stats. */
        mc = rte_lcore_id();
diff --git a/app/test/test_rwlock.c b/app/test/test_rwlock.c
index 5079895..f67fc35 100644
--- a/app/test/test_rwlock.c
+++ b/app/test/test_rwlock.c
@@ -35,7 +35,7 @@
 
 static rte_rwlock_t sl;
 static rte_rwlock_t sl_tab[RTE_MAX_LCORE];
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 enum {
        LC_TYPE_RDLOCK,
@@ -101,7 +101,8 @@ struct try_rwlock_lcore {
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcount < MAX_LOOP) {
@@ -134,12 +135,12 @@ struct try_rwlock_lcore {
        printf("\nRwlock Perf Test on %u cores...\n", rte_lcore_count());
 
        /* clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
        if (rte_eal_mp_remote_launch(load_loop_fn, NULL, SKIP_MAIN) < 0)
                return -1;
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(NULL);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_seqlock.c b/app/test/test_seqlock.c
index 873bd60..7455bac 100644
--- a/app/test/test_seqlock.c
+++ b/app/test/test_seqlock.c
@@ -22,7 +22,7 @@ struct data {
 
 struct reader {
        struct data *data;
-       uint8_t stop;
+       RTE_ATOMIC(uint8_t) stop;
 };
 
 #define WRITER_RUNTIME 2.0 /* s */
@@ -79,7 +79,7 @@ struct reader {
        struct reader *r = arg;
        int rc = TEST_SUCCESS;
 
-       while (__atomic_load_n(&r->stop, __ATOMIC_RELAXED) == 0 &&
+       while (rte_atomic_load_explicit(&r->stop, rte_memory_order_relaxed) == 
0 &&
                        rc == TEST_SUCCESS) {
                struct data *data = r->data;
                bool interrupted;
@@ -115,7 +115,7 @@ struct reader {
 static void
 reader_stop(struct reader *reader)
 {
-       __atomic_store_n(&reader->stop, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&reader->stop, 1, rte_memory_order_relaxed);
 }
 
 #define NUM_WRITERS 2 /* main lcore + one worker */
diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c
index c12d52d..010ab82 100644
--- a/app/test/test_service_cores.c
+++ b/app/test/test_service_cores.c
@@ -59,15 +59,15 @@ static int32_t dummy_mt_unsafe_cb(void *args)
         * test, because two threads are concurrently in a non-MT safe callback.
         */
        uint32_t *test_params = args;
-       uint32_t *lock = &test_params[0];
+       RTE_ATOMIC(uint32_t) *lock = (uint32_t __rte_atomic *)&test_params[0];
        uint32_t *pass_test = &test_params[1];
        uint32_t exp = 0;
-       int lock_taken = __atomic_compare_exchange_n(lock, &exp, 1, 0,
-                                       __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+       int lock_taken = rte_atomic_compare_exchange_strong_explicit(lock, 
&exp, 1,
+                                       rte_memory_order_relaxed, 
rte_memory_order_relaxed);
        if (lock_taken) {
                /* delay with the lock held */
                rte_delay_ms(250);
-               __atomic_store_n(lock, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(lock, 0, rte_memory_order_relaxed);
        } else {
                /* 2nd thread will fail to take lock, so clear pass flag */
                *pass_test = 0;
@@ -86,15 +86,15 @@ static int32_t dummy_mt_safe_cb(void *args)
         *    that 2 threads are running the callback at the same time: MT safe
         */
        uint32_t *test_params = args;
-       uint32_t *lock = &test_params[0];
+       RTE_ATOMIC(uint32_t) *lock = (uint32_t __rte_atomic *)&test_params[0];
        uint32_t *pass_test = &test_params[1];
        uint32_t exp = 0;
-       int lock_taken = __atomic_compare_exchange_n(lock, &exp, 1, 0,
-                                       __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+       int lock_taken = rte_atomic_compare_exchange_strong_explicit(lock, 
&exp, 1,
+                                       rte_memory_order_relaxed, 
rte_memory_order_relaxed);
        if (lock_taken) {
                /* delay with the lock held */
                rte_delay_ms(250);
-               __atomic_store_n(lock, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(lock, 0, rte_memory_order_relaxed);
        } else {
                /* 2nd thread will fail to take lock, so set pass flag */
                *pass_test = 1;
@@ -748,15 +748,15 @@ static int32_t dummy_mt_safe_cb(void *args)
 
        /* retrieve done flag and lock to add/sub */
        uint32_t *done = &params[0];
-       uint32_t *lock = &params[1];
+       RTE_ATOMIC(uint32_t) *lock = (uint32_t __rte_atomic *)&params[1];
 
        while (!*done) {
-               __atomic_fetch_add(lock, 1, __ATOMIC_RELAXED);
+               rte_atomic_fetch_add_explicit(lock, 1, 
rte_memory_order_relaxed);
                rte_delay_us(500);
-               if (__atomic_load_n(lock, __ATOMIC_RELAXED) > 1)
+               if (rte_atomic_load_explicit(lock, rte_memory_order_relaxed) > 
1)
                        /* pass: second core has simultaneously incremented */
                        *done = 1;
-               __atomic_fetch_sub(lock, 1, __ATOMIC_RELAXED);
+               rte_atomic_fetch_sub_explicit(lock, 1, 
rte_memory_order_relaxed);
        }
 
        return 0;
diff --git a/app/test/test_spinlock.c b/app/test/test_spinlock.c
index 9a481f2..a29405a 100644
--- a/app/test/test_spinlock.c
+++ b/app/test/test_spinlock.c
@@ -48,7 +48,7 @@
 static rte_spinlock_recursive_t slr;
 static unsigned count = 0;
 
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 static int
 test_spinlock_per_core(__rte_unused void *arg)
@@ -110,7 +110,8 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        begin = rte_get_timer_cycles();
        while (lcount < MAX_LOOP) {
@@ -149,11 +150,11 @@
        printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
        /* Clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_stack_perf.c b/app/test/test_stack_perf.c
index c5e1caa..3f17a26 100644
--- a/app/test/test_stack_perf.c
+++ b/app/test/test_stack_perf.c
@@ -23,7 +23,7 @@
  */
 static volatile unsigned int bulk_sizes[] = {8, MAX_BURST};
 
-static uint32_t lcore_barrier;
+static RTE_ATOMIC(uint32_t) lcore_barrier;
 
 struct lcore_pair {
        unsigned int c1;
@@ -143,8 +143,8 @@ struct thread_args {
        s = args->s;
        size = args->sz;
 
-       __atomic_fetch_sub(&lcore_barrier, 1, __ATOMIC_RELAXED);
-       rte_wait_until_equal_32(&lcore_barrier, 0, __ATOMIC_RELAXED);
+       rte_atomic_fetch_sub_explicit(&lcore_barrier, 1, 
rte_memory_order_relaxed);
+       rte_wait_until_equal_32((uint32_t *)(uintptr_t)&lcore_barrier, 0, 
rte_memory_order_relaxed);
 
        uint64_t start = rte_rdtsc();
 
@@ -173,7 +173,7 @@ struct thread_args {
        unsigned int i;
 
        for (i = 0; i < RTE_DIM(bulk_sizes); i++) {
-               __atomic_store_n(&lcore_barrier, 2, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&lcore_barrier, 2, 
rte_memory_order_relaxed);
 
                args[0].sz = args[1].sz = bulk_sizes[i];
                args[0].s = args[1].s = s;
@@ -206,7 +206,7 @@ struct thread_args {
                int cnt = 0;
                double avg;
 
-               __atomic_store_n(&lcore_barrier, n, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&lcore_barrier, n, 
rte_memory_order_relaxed);
 
                RTE_LCORE_FOREACH_WORKER(lcore_id) {
                        if (++cnt >= n)
@@ -300,7 +300,7 @@ struct thread_args {
        struct lcore_pair cores;
        struct rte_stack *s;
 
-       __atomic_store_n(&lcore_barrier, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&lcore_barrier, 0, rte_memory_order_relaxed);
 
        s = rte_stack_create(STACK_NAME, STACK_SIZE, rte_socket_id(), flags);
        if (s == NULL) {
diff --git a/app/test/test_threads.c b/app/test/test_threads.c
index 4ac3f26..6d6881a 100644
--- a/app/test/test_threads.c
+++ b/app/test/test_threads.c
@@ -6,12 +6,13 @@
 
 #include <rte_thread.h>
 #include <rte_debug.h>
+#include <rte_stdatomic.h>
 
 #include "test.h"
 
 RTE_LOG_REGISTER(threads_logtype_test, test.threads, INFO);
 
-static uint32_t thread_id_ready;
+static RTE_ATOMIC(uint32_t) thread_id_ready;
 
 static uint32_t
 thread_main(void *arg)
@@ -19,9 +20,9 @@
        if (arg != NULL)
                *(rte_thread_t *)arg = rte_thread_self();
 
-       __atomic_store_n(&thread_id_ready, 1, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 1, 
rte_memory_order_release);
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 1)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 1)
                ;
 
        return 0;
@@ -37,13 +38,13 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, NULL, thread_main, 
&thread_main_id) == 0,
                "Failed to create thread.");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_equal(thread_id, thread_main_id) != 0,
                "Unexpected thread id.");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        RTE_TEST_ASSERT(rte_thread_join(thread_id, NULL) == 0,
                "Failed to join thread.");
@@ -61,13 +62,13 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, NULL, thread_main,
                &thread_main_id) == 0, "Failed to create thread.");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_equal(thread_id, thread_main_id) != 0,
                "Unexpected thread id.");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        RTE_TEST_ASSERT(rte_thread_detach(thread_id) == 0,
                "Failed to detach thread.");
@@ -85,7 +86,7 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, NULL, thread_main, NULL) 
== 0,
                "Failed to create thread");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        priority = RTE_THREAD_PRIORITY_NORMAL;
@@ -121,7 +122,7 @@
        RTE_TEST_ASSERT(priority == RTE_THREAD_PRIORITY_NORMAL,
                "Priority set mismatches priority get");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        return 0;
 }
@@ -137,7 +138,7 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, NULL, thread_main, NULL) 
== 0,
                "Failed to create thread");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_get_affinity_by_id(thread_id, &cpuset0) == 0,
@@ -190,7 +191,7 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, &attr, thread_main, NULL) 
== 0,
                "Failed to create attributes affinity thread.");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_get_affinity_by_id(thread_id, &cpuset1) == 0,
@@ -198,7 +199,7 @@
        RTE_TEST_ASSERT(memcmp(&cpuset0, &cpuset1, sizeof(rte_cpuset_t)) == 0,
                "Failed to apply affinity attributes");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        return 0;
 }
@@ -219,7 +220,7 @@
        RTE_TEST_ASSERT(rte_thread_create(&thread_id, &attr, thread_main, NULL) 
== 0,
                "Failed to create attributes priority thread.");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_get_priority(thread_id, &priority) == 0,
@@ -227,7 +228,7 @@
        RTE_TEST_ASSERT(priority == RTE_THREAD_PRIORITY_NORMAL,
                "Failed to apply priority attributes");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        return 0;
 }
@@ -243,13 +244,13 @@
                thread_main, &thread_main_id) == 0,
                "Failed to create thread.");
 
-       while (__atomic_load_n(&thread_id_ready, __ATOMIC_ACQUIRE) == 0)
+       while (rte_atomic_load_explicit(&thread_id_ready, 
rte_memory_order_acquire) == 0)
                ;
 
        RTE_TEST_ASSERT(rte_thread_equal(thread_id, thread_main_id) != 0,
                "Unexpected thread id.");
 
-       __atomic_store_n(&thread_id_ready, 2, __ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&thread_id_ready, 2, 
rte_memory_order_release);
 
        RTE_TEST_ASSERT(rte_thread_join(thread_id, NULL) == 0,
                "Failed to join thread.");
diff --git a/app/test/test_ticketlock.c b/app/test/test_ticketlock.c
index 1fbbedb..9b6b584 100644
--- a/app/test/test_ticketlock.c
+++ b/app/test/test_ticketlock.c
@@ -48,7 +48,7 @@
 static rte_ticketlock_recursive_t tlr;
 static unsigned int count;
 
-static uint32_t synchro;
+static RTE_ATOMIC(uint32_t) synchro;
 
 static int
 test_ticketlock_per_core(__rte_unused void *arg)
@@ -111,7 +111,8 @@
 
        /* wait synchro for workers */
        if (lcore != rte_get_main_lcore())
-               rte_wait_until_equal_32(&synchro, 1, __ATOMIC_RELAXED);
+               rte_wait_until_equal_32((uint32_t *)(uintptr_t)&synchro, 1,
+                   rte_memory_order_relaxed);
 
        begin = rte_rdtsc_precise();
        while (lcore_count[lcore] < MAX_LOOP) {
@@ -153,11 +154,11 @@
        printf("\nTest with lock on %u cores...\n", rte_lcore_count());
 
        /* Clear synchro and start workers */
-       __atomic_store_n(&synchro, 0, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
        rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MAIN);
 
        /* start synchro and launch test on main */
-       __atomic_store_n(&synchro, 1, __ATOMIC_RELAXED);
+       rte_atomic_store_explicit(&synchro, 1, rte_memory_order_relaxed);
        load_loop_fn(&lock);
 
        rte_eal_mp_wait_lcore();
diff --git a/app/test/test_timer.c b/app/test/test_timer.c
index cac8fc0..dc15a80 100644
--- a/app/test/test_timer.c
+++ b/app/test/test_timer.c
@@ -202,7 +202,7 @@ struct mytimerinfo {
 
 /* Need to synchronize worker lcores through multiple steps. */
 enum { WORKER_WAITING = 1, WORKER_RUN_SIGNAL, WORKER_RUNNING, WORKER_FINISHED 
};
-static uint16_t lcore_state[RTE_MAX_LCORE];
+static RTE_ATOMIC(uint16_t) lcore_state[RTE_MAX_LCORE];
 
 static void
 main_init_workers(void)
@@ -210,7 +210,8 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               __atomic_store_n(&lcore_state[i], WORKER_WAITING, 
__ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&lcore_state[i], WORKER_WAITING,
+                   rte_memory_order_relaxed);
        }
 }
 
@@ -220,10 +221,12 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               __atomic_store_n(&lcore_state[i], WORKER_RUN_SIGNAL, 
__ATOMIC_RELEASE);
+               rte_atomic_store_explicit(&lcore_state[i], WORKER_RUN_SIGNAL,
+                   rte_memory_order_release);
        }
        RTE_LCORE_FOREACH_WORKER(i) {
-               rte_wait_until_equal_16(&lcore_state[i], WORKER_RUNNING, 
__ATOMIC_ACQUIRE);
+               rte_wait_until_equal_16((uint16_t *)(uintptr_t)&lcore_state[i], 
WORKER_RUNNING,
+                   rte_memory_order_acquire);
        }
 }
 
@@ -233,7 +236,8 @@ struct mytimerinfo {
        unsigned i;
 
        RTE_LCORE_FOREACH_WORKER(i) {
-               rte_wait_until_equal_16(&lcore_state[i], WORKER_FINISHED, 
__ATOMIC_ACQUIRE);
+               rte_wait_until_equal_16((uint16_t *)(uintptr_t)&lcore_state[i], 
WORKER_FINISHED,
+                   rte_memory_order_acquire);
        }
 }
 
@@ -242,8 +246,10 @@ struct mytimerinfo {
 {
        unsigned lcore_id = rte_lcore_id();
 
-       rte_wait_until_equal_16(&lcore_state[lcore_id], WORKER_RUN_SIGNAL, 
__ATOMIC_ACQUIRE);
-       __atomic_store_n(&lcore_state[lcore_id], WORKER_RUNNING, 
__ATOMIC_RELEASE);
+       rte_wait_until_equal_16((uint16_t *)(uintptr_t)&lcore_state[lcore_id], 
WORKER_RUN_SIGNAL,
+           rte_memory_order_acquire);
+       rte_atomic_store_explicit(&lcore_state[lcore_id], WORKER_RUNNING,
+           rte_memory_order_release);
 }
 
 static void
@@ -251,7 +257,8 @@ struct mytimerinfo {
 {
        unsigned lcore_id = rte_lcore_id();
 
-       __atomic_store_n(&lcore_state[lcore_id], WORKER_FINISHED, 
__ATOMIC_RELEASE);
+       rte_atomic_store_explicit(&lcore_state[lcore_id], WORKER_FINISHED,
+           rte_memory_order_release);
 }
 
 
@@ -277,12 +284,12 @@ struct mytimerinfo {
        unsigned int lcore_id = rte_lcore_id();
        unsigned int main_lcore = rte_get_main_lcore();
        int32_t my_collisions = 0;
-       static uint32_t collisions;
+       static RTE_ATOMIC(uint32_t) collisions;
 
        if (lcore_id == main_lcore) {
                cb_count = 0;
                test_failed = 0;
-               __atomic_store_n(&collisions, 0, __ATOMIC_RELAXED);
+               rte_atomic_store_explicit(&collisions, 0, 
rte_memory_order_relaxed);
                timers = rte_malloc(NULL, sizeof(*timers) * NB_STRESS2_TIMERS, 
0);
                if (timers == NULL) {
                        printf("Test Failed\n");
@@ -310,7 +317,7 @@ struct mytimerinfo {
                        my_collisions++;
        }
        if (my_collisions != 0)
-               __atomic_fetch_add(&collisions, my_collisions, 
__ATOMIC_RELAXED);
+               rte_atomic_fetch_add_explicit(&collisions, my_collisions, 
rte_memory_order_relaxed);
 
        /* wait long enough for timers to expire */
        rte_delay_ms(100);
@@ -324,7 +331,7 @@ struct mytimerinfo {
 
        /* now check that we get the right number of callbacks */
        if (lcore_id == main_lcore) {
-               my_collisions = __atomic_load_n(&collisions, __ATOMIC_RELAXED);
+               my_collisions = rte_atomic_load_explicit(&collisions, 
rte_memory_order_relaxed);
                if (my_collisions != 0)
                        printf("- %d timer reset collisions (OK)\n", 
my_collisions);
                rte_timer_manage();
-- 
1.8.3.1

Reply via email to