Signed-off-by: Ola Liljedahl <[email protected]>
---
Some of these changes are in preparation for the appearance of
odp_atomic_internal.h but this patch can be merged separately.

odp_atomic.h: Use struct for 32- and 64-bit atomic types, this prevents
non-atomic access to atomic variables. 64-bit atomic support for functionally
challenged 32-bit architectures (e.g. PPC32). Relaxed memory model for atomic
operations, this enhances performance on weakly ordered architectures. Use
GCC __atomic builtins (previously __sync builtins). Added missing
atomic_add_u32, atomic_sub_u32 and atomic_sub_64 operations. Removed 32- and
64-bit cmpset operations. Made some 64-bit operations atomic on 32-bit
architectures.
odp_generator.c: updated to use odp_atomic.h operations with atomic types.
odp_barrier.h: minor function signature change.
odp_spin_internal.h: removed odp_mem_barrier() which is a compiler only
barrier and basically useless and dangerous on all weakly ordered
architectures.
odp_barrier.c: updated to use odp_atomic.h typese and functions. Made
the wrap-around atomic.
odp_buffer.c: updated to use odp_atomic.h functions (ref_count).
odp_ring.c: added missing release barriers. Updated to use GCC __atomic
cmpset facility (temporary fix).
odp_rwlock.c: updated to use odp_atomic.h. Updated to use GCC __atomic
facilities (temporary fix).
odp_ticketlock.c: updated to use odp_atomic.h. Updated to use GCC __atomic
facilities (temporary fix). Added missing barrier in ticketlock_lock().
odp_atomic_test.c: update to conform to latest odp_atomic.h syntax. Added
usage of odp_barrier_t instead of home grown thread barrier.


 example/generator/odp_generator.c                  |  28 ++-
 platform/linux-generic/include/api/odp_atomic.h    | 269 +++++++++++++--------
 platform/linux-generic/include/api/odp_barrier.h   |   2 +-
 platform/linux-generic/include/odp_spin_internal.h |   9 -
 platform/linux-generic/odp_barrier.c               |  17 +-
 platform/linux-generic/odp_buffer.c                |   3 +-
 platform/linux-generic/odp_ring.c                  |  28 ++-
 platform/linux-generic/odp_rwlock.c                |  24 +-
 platform/linux-generic/odp_ticketlock.c            |  12 +-
 test/api_test/odp_atomic_test.c                    |  31 +--
 10 files changed, 253 insertions(+), 170 deletions(-)

diff --git a/example/generator/odp_generator.c 
b/example/generator/odp_generator.c
index ffa5e62..c9c4ef4 100644
--- a/example/generator/odp_generator.c
+++ b/example/generator/odp_generator.c
@@ -201,7 +201,7 @@ static void pack_udp_pkt(odp_buffer_t obuf)
        ip->tot_len = odp_cpu_to_be_16(args->appl.payload + ODPH_UDPHDR_LEN +
                                       ODPH_IPV4HDR_LEN);
        ip->proto = ODPH_IPPROTO_UDP;
-       seq = odp_atomic_fetch_add_u64(&counters.seq, 1) % 0xFFFF;
+       seq = odp_atomic_fetch_inc_u64(&counters.seq) % 0xFFFF;
        ip->id = odp_cpu_to_be_16(seq);
        ip->chksum = 0;
        odph_ipv4_csum_update(pkt);
@@ -258,7 +258,7 @@ static void pack_icmp_pkt(odp_buffer_t obuf)
        ip->tot_len = odp_cpu_to_be_16(args->appl.payload + ODPH_ICMPHDR_LEN +
                                       ODPH_IPV4HDR_LEN);
        ip->proto = ODPH_IPPROTO_ICMP;
-       seq = odp_atomic_fetch_add_u64(&counters.seq, 1) % 0xffff;
+       seq = odp_atomic_fetch_inc_u64(&counters.seq) % 0xffff;
        ip->id = odp_cpu_to_be_16(seq);
        ip->chksum = 0;
        odph_ipv4_csum_update(pkt);
@@ -335,11 +335,14 @@ static void *gen_send_thread(void *arg)
 
                if (args->appl.interval != 0) {
                        printf("  [%02i] send pkt no:%ju seq %ju\n",
-                              thr, counters.seq, counters.seq%0xffff);
+                              thr,
+                              odp_atomic_load_u64(&counters.seq),
+                              odp_atomic_load_u64(&counters.seq)%0xffff);
                        /* TODO use odp timer */
                        usleep(args->appl.interval * 1000);
                }
-               if (args->appl.number != -1 && counters.seq
+               if (args->appl.number != -1 &&
+                   odp_atomic_load_u64(&counters.seq)
                    >= (unsigned int)args->appl.number) {
                        break;
                }
@@ -348,7 +351,8 @@ static void *gen_send_thread(void *arg)
        /* receive number of reply pks until timeout */
        if (args->appl.mode == APPL_MODE_PING && args->appl.number > 0) {
                while (args->appl.timeout >= 0) {
-                       if (counters.icmp >= (unsigned int)args->appl.number)
+                       if (odp_atomic_load_u64(&counters.icmp) >=
+                           (unsigned int)args->appl.number)
                                break;
                        /* TODO use odp timer */
                        sleep(1);
@@ -358,10 +362,12 @@ static void *gen_send_thread(void *arg)
 
        /* print info */
        if (args->appl.mode == APPL_MODE_UDP) {
-               printf("  [%02i] total send: %ju\n", thr, counters.seq);
+               printf("  [%02i] total send: %ju\n",
+                      thr, odp_atomic_load_u64(&counters.seq));
        } else if (args->appl.mode == APPL_MODE_PING) {
                printf("  [%02i] total send: %ju total receive: %ju\n",
-                      thr, counters.seq, counters.icmp);
+                      thr, odp_atomic_load_u64(&counters.seq),
+                      odp_atomic_load_u64(&counters.icmp));
        }
        return arg;
 }
@@ -530,10 +536,10 @@ int main(int argc, char *argv[])
        }
 
        /* init counters */
-       odp_atomic_init_u64(&counters.seq);
-       odp_atomic_init_u64(&counters.ip);
-       odp_atomic_init_u64(&counters.udp);
-       odp_atomic_init_u64(&counters.icmp);
+       odp_atomic_init_u64(&counters.seq, 0);
+       odp_atomic_init_u64(&counters.ip, 0);
+       odp_atomic_init_u64(&counters.udp, 0);
+       odp_atomic_init_u64(&counters.icmp, 0);
 
        /* Reserve memory for args from shared mem */
        shm = odp_shm_reserve("shm_args", sizeof(args_t),
diff --git a/platform/linux-generic/include/api/odp_atomic.h 
b/platform/linux-generic/include/api/odp_atomic.h
index 5c83b39..78ba403 100644
--- a/platform/linux-generic/include/api/odp_atomic.h
+++ b/platform/linux-generic/include/api/odp_atomic.h
@@ -18,11 +18,12 @@
 extern "C" {
 #endif
 
-
-#include <odp_std_types.h>
+#include <stdint.h>
+#include <odp_align.h>
 
 /** @addtogroup odp_synchronizers
- *  Atomic operations.
+ *  Atomic types and relaxed operations. These operations cannot be used for
+ *  synchronization.
  *  @{
  */
 
@@ -30,56 +31,67 @@ extern "C" {
 /**
  * Atomic unsigned integer 64 bits
  */
-typedef volatile uint64_t odp_atomic_u64_t;
+typedef struct {
+       uint64_t v; /**< Actual storage for the atomic variable */
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       /* Some architectures do not support lock-free operations on 64-bit
+        * data types. We use a spin lock to ensure atomicity. */
+       char lock;
+#endif
+} odp_atomic_u64_t
+ODP_ALIGNED(sizeof(uint64_t)); /* Enforce alignement! */
+
 
 /**
  * Atomic unsigned integer 32 bits
  */
-typedef volatile uint32_t odp_atomic_u32_t;
+typedef struct {
+       uint32_t v; /**< Actual storage for the atomic variable */
+} odp_atomic_u32_t
+ODP_ALIGNED(sizeof(uint32_t)); /* Enforce alignement! */
 
 
 /**
  * Initialize atomic uint32
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
- *
- * @note The operation is not synchronized with other threads
+ * @param val    Value to initialize the variable with
  */
-static inline void odp_atomic_init_u32(odp_atomic_u32_t *ptr)
+static inline void odp_atomic_init_u32(odp_atomic_u32_t *ptr, uint32_t val)
 {
-       *ptr = 0;
+       __atomic_store_n(&ptr->v, val, __ATOMIC_RELAXED);
 }
 
 /**
  * Load value of atomic uint32
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
- * @return atomic uint32 value
- *
- * @note The operation is not synchronized with other threads
+ * @return Value of the variable
  */
 static inline uint32_t odp_atomic_load_u32(odp_atomic_u32_t *ptr)
 {
-       return *ptr;
+       return __atomic_load_n(&ptr->v, __ATOMIC_RELAXED);
 }
 
 /**
  * Store value to atomic uint32
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr        An atomic variable
  * @param new_value  Store new_value to a variable
- *
- * @note The operation is not synchronized with other threads
  */
 static inline void odp_atomic_store_u32(odp_atomic_u32_t *ptr,
                                        uint32_t new_value)
 {
-       *ptr = new_value;
+       __atomic_store_n(&ptr->v, new_value, __ATOMIC_RELAXED);
 }
 
 /**
  * Fetch and add atomic uint32
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  * @param value  A value to be added to the variable
@@ -89,11 +101,25 @@ static inline void odp_atomic_store_u32(odp_atomic_u32_t 
*ptr,
 static inline uint32_t odp_atomic_fetch_add_u32(odp_atomic_u32_t *ptr,
                                                uint32_t value)
 {
-       return __sync_fetch_and_add(ptr, value);
+       return __atomic_fetch_add(&ptr->v, value, __ATOMIC_RELAXED);
+}
+
+/**
+ * Add atomic uint32
+ * @note Relaxed memory order, cannot be used for synchronization
+ *
+ * @param ptr    An atomic variable
+ * @param value  A value to be added to the variable
+ */
+static inline void odp_atomic_add_u32(odp_atomic_u32_t *ptr,
+                                     uint32_t value)
+{
+       (void)__atomic_fetch_add(&ptr->v, value, __ATOMIC_RELAXED);
 }
 
 /**
  * Fetch and subtract uint32
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  * @param value  A value to be sub to the variable
@@ -103,51 +129,59 @@ static inline uint32_t 
odp_atomic_fetch_add_u32(odp_atomic_u32_t *ptr,
 static inline uint32_t odp_atomic_fetch_sub_u32(odp_atomic_u32_t *ptr,
                                                uint32_t value)
 {
-       return __sync_fetch_and_sub(ptr, value);
+       return __atomic_fetch_sub(&ptr->v, value, __ATOMIC_RELAXED);
+}
+
+/**
+ * Subtract uint32
+ * @note Relaxed memory order, cannot be used for synchronization
+ *
+ * @param ptr    An atomic variable
+ * @param value  A value to be subtract from the variable
+ */
+static inline void odp_atomic_sub_u32(odp_atomic_u32_t *ptr,
+                                     uint32_t value)
+{
+       (void)__atomic_fetch_sub(&ptr->v, value, __ATOMIC_RELAXED);
 }
 
 /**
  * Fetch and increment atomic uint32 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  * @return Value of the variable before the operation
  */
-#if defined __OCTEON__
 
 static inline uint32_t odp_atomic_fetch_inc_u32(odp_atomic_u32_t *ptr)
 {
+#if defined __OCTEON__
        uint32_t ret;
-
        __asm__ __volatile__ ("syncws");
        __asm__ __volatile__ ("lai %0,(%2)" : "=r" (ret), "+m" (ptr) :
                              "r" (ptr));
-
        return ret;
-}
-
 #else
-
-static inline uint32_t odp_atomic_fetch_inc_u32(odp_atomic_u32_t *ptr)
-{
-       return odp_atomic_fetch_add_u32(ptr, 1);
-}
-
+       return __atomic_fetch_add(&ptr->v, 1, __ATOMIC_RELAXED);
 #endif
+}
 
 /**
  * Increment atomic uint32 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  */
 static inline void odp_atomic_inc_u32(odp_atomic_u32_t *ptr)
 {
-       odp_atomic_fetch_add_u32(ptr, 1);
+       (void)__atomic_fetch_add(&ptr->v, 1, __ATOMIC_RELAXED);
 }
 
 /**
  * Fetch and decrement uint32 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
@@ -155,62 +189,75 @@ static inline void odp_atomic_inc_u32(odp_atomic_u32_t 
*ptr)
  */
 static inline uint32_t odp_atomic_fetch_dec_u32(odp_atomic_u32_t *ptr)
 {
-       return odp_atomic_fetch_sub_u32(ptr, 1);
+       return __atomic_fetch_sub(&ptr->v, 1, __ATOMIC_RELAXED);
 }
 
 /**
  * Decrement atomic uint32 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  */
 static inline void odp_atomic_dec_u32(odp_atomic_u32_t *ptr)
 {
-       odp_atomic_fetch_sub_u32(ptr, 1);
+       (void)__atomic_fetch_sub(&ptr->v, 1, __ATOMIC_RELAXED);
 }
 
 /**
- * Atomic compare and set for 32bit
+ * Initialize atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
- * @param dst destination location into which the value will be written.
- * @param exp expected value.
- * @param src new value.
- * @return Non-zero on success; 0 on failure.
+ * @param ptr    An atomic variable
  */
-static inline int
-odp_atomic_cmpset_u32(odp_atomic_u32_t *dst, uint32_t exp, uint32_t src)
+static inline void odp_atomic_init_u64(odp_atomic_u64_t *ptr, uint64_t 
init_val)
 {
-       return __sync_bool_compare_and_swap(dst, exp, src);
+       ptr->v = init_val;
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       __atomic_clear(&ptr->lock, __ATOMIC_RELAXED);
+#endif
 }
 
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
 /**
- * Initialize atomic uint64
- *
- * @param ptr    An atomic variable
- *
- * @note The operation is not synchronized with other threads
+ * Helper macro for lock-based atomic operations on 64-bit integers
+ * @param ptr Pointer to the 64-bit atomic variable
+ * @param expr Expression used update the variable.
+ * @return The old value of the variable.
  */
-static inline void odp_atomic_init_u64(odp_atomic_u64_t *ptr)
-{
-       *ptr = 0;
-}
+#define ATOMIC_OP(ptr, expr) \
+({ \
+       uint64_t old_val; \
+       /* Loop while lock is already taken, stop when lock becomes clear */ \
+       while (__atomic_test_and_set(&(ptr)->lock, __ATOMIC_ACQUIRE)) \
+               (void)0; \
+       old_val = (ptr)->v; \
+       (expr); /* Perform whatever update is desired */ \
+       __atomic_clear(&(ptr)->lock, __ATOMIC_RELEASE); \
+       old_val; /* Return old value */ \
+})
+#endif
 
 /**
  * Load value of atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  * @return atomic uint64 value
- *
- * @note The operation is not synchronized with other threads
  */
 static inline uint64_t odp_atomic_load_u64(odp_atomic_u64_t *ptr)
 {
-       return *ptr;
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       return ATOMIC_OP(ptr, (void)0);
+#else
+       return __atomic_load_n(&ptr->v, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Store value to atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr        An atomic variable
  * @param new_value  Store new_value to a variable
@@ -220,23 +267,16 @@ static inline uint64_t 
odp_atomic_load_u64(odp_atomic_u64_t *ptr)
 static inline void odp_atomic_store_u64(odp_atomic_u64_t *ptr,
                                        uint64_t new_value)
 {
-       *ptr = new_value;
-}
-
-/**
- * Add atomic uint64
- *
- * @param ptr    An atomic variable
- * @param value  A value to be added to the variable
- *
- */
-static inline void odp_atomic_add_u64(odp_atomic_u64_t *ptr, uint64_t value)
-{
-       __sync_fetch_and_add(ptr, value);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       (void)ATOMIC_OP(ptr, ptr->v = new_value);
+#else
+       __atomic_store_n(&ptr->v, new_value, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Fetch and add atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  * @param value  A value to be added to the variable
@@ -244,56 +284,72 @@ static inline void odp_atomic_add_u64(odp_atomic_u64_t 
*ptr, uint64_t value)
  * @return Value of the variable before the operation
  */
 
-#if defined __powerpc__ && !defined __powerpc64__
 static inline uint64_t odp_atomic_fetch_add_u64(odp_atomic_u64_t *ptr,
                                                uint64_t value)
 {
-       return __sync_fetch_and_add((odp_atomic_u32_t *)ptr,
-                                   (uint32_t)value);
-}
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       return ATOMIC_OP(ptr, ptr->v += value);
 #else
-static inline uint64_t odp_atomic_fetch_add_u64(odp_atomic_u64_t *ptr,
-                                               uint64_t value)
-{
-       return __sync_fetch_and_add(ptr, value);
-}
+       return __atomic_fetch_add(&ptr->v, value, __ATOMIC_RELAXED);
 #endif
+}
+
 /**
- * Subtract atomic uint64
+ * Add atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
- * @param value  A value to be subtracted from the variable
+ * @param value  A value to be added to the variable
  *
  */
-static inline void odp_atomic_sub_u64(odp_atomic_u64_t *ptr, uint64_t value)
+static inline void odp_atomic_add_u64(odp_atomic_u64_t *ptr, uint64_t value)
 {
-       __sync_fetch_and_sub(ptr, value);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       (void)ATOMIC_OP(ptr, ptr->v += value);
+#else
+       (void)__atomic_fetch_add(&ptr->v, value, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Fetch and subtract atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  * @param value  A value to be subtracted from the variable
  *
  * @return Value of the variable before the operation
  */
-#if defined __powerpc__ && !defined __powerpc64__
 static inline uint64_t odp_atomic_fetch_sub_u64(odp_atomic_u64_t *ptr,
                                                uint64_t value)
 {
-       return __sync_fetch_and_sub((odp_atomic_u32_t *)ptr,
-                                   (uint32_t)value);
-}
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       return ATOMIC_OP(ptr, ptr->v -= value);
 #else
-static inline uint64_t odp_atomic_fetch_sub_u64(odp_atomic_u64_t *ptr,
-                                               uint64_t value)
-{
-       return __sync_fetch_and_sub(ptr, value);
+       return __atomic_fetch_sub(&ptr->v, value, __ATOMIC_RELAXED);
+#endif
 }
+
+/**
+ * Subtract atomic uint64
+ * @note Relaxed memory order, cannot be used for synchronization
+ *
+ * @param ptr    An atomic variable
+ * @param value  A value to be subtracted from the variable
+ *
+ */
+static inline void odp_atomic_sub_u64(odp_atomic_u64_t *ptr, uint64_t value)
+{
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       (void)ATOMIC_OP(ptr, ptr->v -= value);
+#else
+       (void)__atomic_fetch_sub(&ptr->v, value, __ATOMIC_RELAXED);
 #endif
+}
+
 /**
  * Fetch and increment atomic uint64 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
@@ -301,22 +357,32 @@ static inline uint64_t 
odp_atomic_fetch_sub_u64(odp_atomic_u64_t *ptr,
  */
 static inline uint64_t odp_atomic_fetch_inc_u64(odp_atomic_u64_t *ptr)
 {
-       return odp_atomic_fetch_add_u64(ptr, 1);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       return ATOMIC_OP(ptr, ptr->v++);
+#else
+       return __atomic_fetch_add(&ptr->v, 1, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Increment atomic uint64 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  */
 static inline void odp_atomic_inc_u64(odp_atomic_u64_t *ptr)
 {
-       odp_atomic_fetch_add_u64(ptr, 1);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       (void)ATOMIC_OP(ptr, ptr->v++);
+#else
+       (void)__atomic_fetch_add(&ptr->v, 1, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Fetch and decrement atomic uint64 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
@@ -324,32 +390,27 @@ static inline void odp_atomic_inc_u64(odp_atomic_u64_t 
*ptr)
  */
 static inline uint64_t odp_atomic_fetch_dec_u64(odp_atomic_u64_t *ptr)
 {
-       return odp_atomic_fetch_sub_u64(ptr, 1);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       return ATOMIC_OP(ptr, ptr->v--);
+#else
+       return __atomic_fetch_sub(&ptr->v, 1, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
  * Decrement atomic uint64 by 1
+ * @note Relaxed memory order, cannot be used for synchronization
  *
  * @param ptr    An atomic variable
  *
  */
 static inline void odp_atomic_dec_u64(odp_atomic_u64_t *ptr)
 {
-       odp_atomic_fetch_sub_u64(ptr, 1);
-}
-
-/**
- * Atomic compare and set for 64bit
- *
- * @param dst destination location into which the value will be written.
- * @param exp expected value.
- * @param src new value.
- * @return Non-zero on success; 0 on failure.
- */
-static inline int
-odp_atomic_cmpset_u64(odp_atomic_u64_t *dst, uint64_t exp, uint64_t src)
-{
-       return __sync_bool_compare_and_swap(dst, exp, src);
+#if __GCC_ATOMIC_LLONG_LOCK_FREE < 2
+       (void)ATOMIC_OP(ptr, ptr->v--);
+#else
+       (void)__atomic_fetch_sub(&ptr->v, 1, __ATOMIC_RELAXED);
+#endif
 }
 
 /**
diff --git a/platform/linux-generic/include/api/odp_barrier.h 
b/platform/linux-generic/include/api/odp_barrier.h
index fb02a9d..172c2ef 100644
--- a/platform/linux-generic/include/api/odp_barrier.h
+++ b/platform/linux-generic/include/api/odp_barrier.h
@@ -42,7 +42,7 @@ typedef struct odp_barrier_t {
  * @param barrier    Barrier
  * @param count      Thread count
  */
-void odp_barrier_init_count(odp_barrier_t *barrier, int count);
+void odp_barrier_init_count(odp_barrier_t *barrier, uint32_t count);
 
 
 /**
diff --git a/platform/linux-generic/include/odp_spin_internal.h 
b/platform/linux-generic/include/odp_spin_internal.h
index b7e2071..29c524f 100644
--- a/platform/linux-generic/include/odp_spin_internal.h
+++ b/platform/linux-generic/include/odp_spin_internal.h
@@ -15,15 +15,6 @@ extern "C" {
 
 
 /**
- * GCC memory barrier for ODP internal use
- */
-static inline void odp_mem_barrier(void)
-{
-       __asm__ __volatile__ ("" : : : "memory");
-}
-
-
-/**
  * Spin loop for ODP internal use
  */
 static inline void odp_spin(void)
diff --git a/platform/linux-generic/odp_barrier.c 
b/platform/linux-generic/odp_barrier.c
index f4a87c8..ab5cd45 100644
--- a/platform/linux-generic/odp_barrier.c
+++ b/platform/linux-generic/odp_barrier.c
@@ -8,11 +8,10 @@
 #include <odp_sync.h>
 #include <odp_spin_internal.h>
 
-void odp_barrier_init_count(odp_barrier_t *barrier, int count)
+void odp_barrier_init_count(odp_barrier_t *barrier, uint32_t count)
 {
        barrier->count = count;
-       barrier->bar   = 0;
-       odp_sync_stores();
+       odp_atomic_init_u32(&barrier->bar, 0);
 }
 
 /*
@@ -33,16 +32,18 @@ void odp_barrier_sync(odp_barrier_t *barrier)
        uint32_t count;
        int wasless;
 
-       odp_sync_stores();
-       wasless = barrier->bar < barrier->count;
+       __atomic_thread_fence(__ATOMIC_SEQ_CST);
        count   = odp_atomic_fetch_inc_u32(&barrier->bar);
+       wasless = count < barrier->count;
 
        if (count == 2*barrier->count-1) {
-               barrier->bar = 0;
+               /* Wrap around *atomically* */
+               odp_atomic_sub_u32(&barrier->bar, 2 * barrier->count);
        } else {
-               while ((barrier->bar < barrier->count) == wasless)
+               while ((odp_atomic_load_u32(&barrier->bar) < barrier->count)
+                               == wasless)
                        odp_spin();
        }
 
-       odp_mem_barrier();
+       __atomic_thread_fence(__ATOMIC_SEQ_CST);
 }
diff --git a/platform/linux-generic/odp_buffer.c 
b/platform/linux-generic/odp_buffer.c
index e54e0e7..4646486 100644
--- a/platform/linux-generic/odp_buffer.c
+++ b/platform/linux-generic/odp_buffer.c
@@ -73,7 +73,8 @@ int odp_buffer_snprint(char *str, size_t n, odp_buffer_t buf)
        len += snprintf(&str[len], n-len,
                        "  cur_offset   %zu\n",       hdr->cur_offset);
        len += snprintf(&str[len], n-len,
-                       "  ref_count    %i\n",        hdr->ref_count);
+                       "  ref_count    %i\n",
+                       odp_atomic_load_u32(&hdr->ref_count));
        len += snprintf(&str[len], n-len,
                        "  type         %i\n",        hdr->type);
        len += snprintf(&str[len], n-len,
diff --git a/platform/linux-generic/odp_ring.c 
b/platform/linux-generic/odp_ring.c
index 632aa66..1d3130a 100644
--- a/platform/linux-generic/odp_ring.c
+++ b/platform/linux-generic/odp_ring.c
@@ -259,13 +259,16 @@ int __odph_ring_mp_do_enqueue(odph_ring_t *r, void * 
const *obj_table,
                }
 
                prod_next = prod_head + n;
-               success = odp_atomic_cmpset_u32(&r->prod.head, prod_head,
-                                             prod_next);
+               success = __atomic_compare_exchange_n(&r->prod.head,
+                               &prod_head,
+                               prod_next,
+                               false/*strong*/,
+                               __ATOMIC_ACQUIRE,
+                               __ATOMIC_RELAXED);
        } while (odp_unlikely(success == 0));
 
        /* write entries in ring */
        ENQUEUE_PTRS();
-       odp_mem_barrier();
 
        /* if we exceed the watermark */
        if (odp_unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
@@ -282,6 +285,8 @@ int __odph_ring_mp_do_enqueue(odph_ring_t *r, void * const 
*obj_table,
        while (odp_unlikely(r->prod.tail != prod_head))
                odp_spin();
 
+       /* Release our entries and the memory they refer to */
+       __atomic_thread_fence(__ATOMIC_RELEASE);
        r->prod.tail = prod_next;
        return ret;
 }
@@ -324,7 +329,6 @@ int __odph_ring_sp_do_enqueue(odph_ring_t *r, void * const 
*obj_table,
 
        /* write entries in ring */
        ENQUEUE_PTRS();
-       odp_mem_barrier();
 
        /* if we exceed the watermark */
        if (odp_unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) {
@@ -334,6 +338,8 @@ int __odph_ring_sp_do_enqueue(odph_ring_t *r, void * const 
*obj_table,
                ret = (behavior == ODPH_RING_QUEUE_FIXED) ? 0 : n;
        }
 
+       /* Release our entries and the memory they refer to */
+       __atomic_thread_fence(__ATOMIC_RELEASE);
        r->prod.tail = prod_next;
        return ret;
 }
@@ -378,13 +384,16 @@ int __odph_ring_mc_do_dequeue(odph_ring_t *r, void 
**obj_table,
                }
 
                cons_next = cons_head + n;
-               success = odp_atomic_cmpset_u32(&r->cons.head, cons_head,
-                                             cons_next);
+               success = __atomic_compare_exchange_n(&r->cons.head,
+                               &cons_head,
+                               cons_next,
+                               false/*strong*/,
+                               __ATOMIC_ACQUIRE,
+                               __ATOMIC_RELAXED);
        } while (odp_unlikely(success == 0));
 
        /* copy in table */
        DEQUEUE_PTRS();
-       odp_mem_barrier();
 
        /*
         * If there are other dequeues in progress that preceded us,
@@ -393,6 +402,8 @@ int __odph_ring_mc_do_dequeue(odph_ring_t *r, void 
**obj_table,
        while (odp_unlikely(r->cons.tail != cons_head))
                odp_spin();
 
+       /* Release our entries and the memory they refer to */
+       __atomic_thread_fence(__ATOMIC_RELEASE);
        r->cons.tail = cons_next;
 
        return behavior == ODPH_RING_QUEUE_FIXED ? 0 : n;
@@ -431,9 +442,10 @@ int __odph_ring_sc_do_dequeue(odph_ring_t *r, void 
**obj_table,
        cons_next = cons_head + n;
        r->cons.head = cons_next;
 
+       /* Acquire the pointers and the memory they refer to */
+       __atomic_thread_fence(__ATOMIC_ACQUIRE);
        /* copy in table */
        DEQUEUE_PTRS();
-       odp_mem_barrier();
 
        r->cons.tail = cons_next;
        return behavior == ODPH_RING_QUEUE_FIXED ? 0 : n;
diff --git a/platform/linux-generic/odp_rwlock.c 
b/platform/linux-generic/odp_rwlock.c
index 11c8dd7..2f6a255 100644
--- a/platform/linux-generic/odp_rwlock.c
+++ b/platform/linux-generic/odp_rwlock.c
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier:     BSD-3-Clause
  */
 
+#include <stdbool.h>
 #include <odp_atomic.h>
 #include <odp_rwlock.h>
 
@@ -26,15 +27,18 @@ void odp_rwlock_read_lock(odp_rwlock_t *rwlock)
                        odp_spin();
                        continue;
                }
-               is_locked = odp_atomic_cmpset_u32(
-                                       (volatile uint32_t *)&rwlock->cnt,
-                                             cnt, cnt + 1);
+               is_locked = __atomic_compare_exchange_n(&rwlock->cnt,
+                               &cnt,
+                               cnt + 1,
+                               false/*strong*/,
+                               __ATOMIC_ACQUIRE,
+                               __ATOMIC_RELAXED);
        }
 }
 
 void odp_rwlock_read_unlock(odp_rwlock_t *rwlock)
 {
-       odp_atomic_dec_u32((odp_atomic_u32_t *)(intptr_t)&rwlock->cnt);
+       (void)__atomic_sub_fetch(&rwlock->cnt, 1, __ATOMIC_RELEASE);
 }
 
 void odp_rwlock_write_lock(odp_rwlock_t *rwlock)
@@ -43,19 +47,23 @@ void odp_rwlock_write_lock(odp_rwlock_t *rwlock)
        int is_locked = 0;
 
        while (is_locked == 0) {
+               int32_t zero = 0;
                cnt = rwlock->cnt;
                /* lock aquired, wait */
                if (cnt != 0) {
                        odp_spin();
                        continue;
                }
-               is_locked = odp_atomic_cmpset_u32(
-                                       (volatile uint32_t *)&rwlock->cnt,
-                                             0, -1);
+               is_locked = __atomic_compare_exchange_n(&rwlock->cnt,
+                               &zero,
+                               -1,
+                               false/*strong*/,
+                               __ATOMIC_ACQUIRE,
+                               __ATOMIC_RELAXED);
        }
 }
 
 void odp_rwlock_write_unlock(odp_rwlock_t *rwlock)
 {
-       odp_atomic_inc_u32((odp_atomic_u32_t *)(intptr_t)&rwlock->cnt);
+       (void)__atomic_add_fetch(&rwlock->cnt, 1, __ATOMIC_RELEASE);
 }
diff --git a/platform/linux-generic/odp_ticketlock.c 
b/platform/linux-generic/odp_ticketlock.c
index be5b885..dc6ff83 100644
--- a/platform/linux-generic/odp_ticketlock.c
+++ b/platform/linux-generic/odp_ticketlock.c
@@ -12,9 +12,8 @@
 
 void odp_ticketlock_init(odp_ticketlock_t *ticketlock)
 {
-       ticketlock->next_ticket = 0;
+       odp_atomic_init_u32(&ticketlock->next_ticket, 0);
        ticketlock->cur_ticket  = 0;
-       odp_sync_stores();
 }
 
 
@@ -27,7 +26,7 @@ void odp_ticketlock_lock(odp_ticketlock_t *ticketlock)
        while (ticket != ticketlock->cur_ticket)
                odp_spin();
 
-       odp_mem_barrier();
+       __atomic_thread_fence(__ATOMIC_ACQUIRE);
 }
 
 
@@ -38,14 +37,15 @@ void odp_ticketlock_unlock(odp_ticketlock_t *ticketlock)
        ticketlock->cur_ticket++;
 
 #if defined __OCTEON__
-       odp_sync_stores();
+       odp_sync_stores(); /* Possibly SYNCW instead of SYNC */
 #else
-       odp_mem_barrier();
+       __atomic_thread_fence(__ATOMIC_RELEASE);
 #endif
 }
 
 
 int odp_ticketlock_is_locked(odp_ticketlock_t *ticketlock)
 {
-       return ticketlock->cur_ticket != ticketlock->next_ticket;
+       return ticketlock->cur_ticket !=
+               odp_atomic_load_u32(&ticketlock->next_ticket);
 }
diff --git a/test/api_test/odp_atomic_test.c b/test/api_test/odp_atomic_test.c
index 3ca7674..6593634 100644
--- a/test/api_test/odp_atomic_test.c
+++ b/test/api_test/odp_atomic_test.c
@@ -13,15 +13,15 @@
 static odp_atomic_u32_t a32u;
 static odp_atomic_u64_t a64u;
 
-static odp_atomic_u32_t numthrds;
+static odp_barrier_t barrier;
 
 static const char * const test_name[] = {
        "dummy",
-       "test atomic basic ops add/sub/inc/dec",
-       "test atomic inc/dec of unsigned word",
-       "test atomic add/sub of unsigned word",
-       "test atomic inc/dec of unsigned double word",
-       "test atomic add/sub of unsigned double word"
+       "test atomic basic ops (add/sub/inc/dec) on 32- and 64-bit atomic ints",
+       "test atomic inc/dec of 32-bit atomic int",
+       "test atomic add/sub of 32-bit atomic int",
+       "test atomic inc/dec of 64-bit atomic int",
+       "test atomic add/sub of 64-bit atomic int"
 };
 
 static struct timeval tv0[MAX_WORKERS], tv1[MAX_WORKERS];
@@ -153,8 +153,8 @@ void test_atomic_basic(void)
 
 void test_atomic_init(void)
 {
-       odp_atomic_init_u32(&a32u);
-       odp_atomic_init_u64(&a64u);
+       odp_atomic_init_u32(&a32u, 0);
+       odp_atomic_init_u64(&a64u, 0);
 }
 
 void test_atomic_store(void)
@@ -187,11 +187,14 @@ static void *run_thread(void *arg)
 
        ODP_DBG("Thread %i starts\n", thr);
 
-       odp_atomic_inc_u32(&numthrds);
-
-       /* Wait here until all pthreads are created */
-       while (*(volatile int *)&numthrds < parg->numthrds)
-               ;
+       /* Wait here until all threads have arrived */
+       /* Use multiple barriers to verify that it handles wrap around and
+        * has no race conditions which could be exposed when invoked back-
+        * to-back */
+       odp_barrier_sync(&barrier);
+       odp_barrier_sync(&barrier);
+       odp_barrier_sync(&barrier);
+       odp_barrier_sync(&barrier);
 
        gettimeofday(&tv0[thr], NULL);
 
@@ -262,7 +265,6 @@ int main(int argc, char *argv[])
        if (pthrdnum == 0)
                pthrdnum = odp_sys_core_count();
 
-       odp_atomic_init_u32(&numthrds);
        test_atomic_init();
        test_atomic_store();
 
@@ -277,6 +279,7 @@ int main(int argc, char *argv[])
                usage();
                goto err_exit;
        }
+       odp_barrier_init_count(&barrier, pthrdnum);
        odp_test_thread_create(run_thread, &thrdarg);
 
        odp_test_thread_exit(&thrdarg);
-- 
1.9.1


_______________________________________________
lng-odp mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to