On Fri, Dec 11, 2015 at 5:30 AM, Petri Savolainen < [email protected]> wrote:
> Added new memory barriers. These follow C11 release / > acquire specification and replaces odp_sync_stores(). > Used GCC __atomic_thread_fence to implement all three > barriers. > > Signed-off-by: Petri Savolainen <[email protected]> > Reviewed-by: Bill Fischofer <[email protected]> > --- > include/odp/api/barrier.h | 11 ++++- > include/odp/api/sync.h | 82 > ++++++++++++++++++++----------- > platform/linux-generic/include/odp/sync.h | 28 +++++++++++ > 3 files changed, 90 insertions(+), 31 deletions(-) > > diff --git a/include/odp/api/barrier.h b/include/odp/api/barrier.h > index 8ca2647..823eae6 100644 > --- a/include/odp/api/barrier.h > +++ b/include/odp/api/barrier.h > @@ -18,8 +18,15 @@ > extern "C" { > #endif > > -/** @defgroup odp_barrier ODP BARRIER > - * Thread excution and memory ordering barriers. > +/** > + * @defgroup odp_barrier ODP BARRIER > + * Thread excution and memory ordering barriers. > + * > + * @details > + * <b> Thread execution barrier (odp_barrier_t) </b> > + * > + * Thread execution barrier synchronizes a group of threads to wait on the > + * barrier until the entire group has reached the barrier. > * @{ > */ > > diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h > index 6477e74..c6f790c 100644 > --- a/include/odp/api/sync.h > +++ b/include/odp/api/sync.h > @@ -8,7 +8,7 @@ > /** > * @file > * > - * ODP synchronisation > + * ODP memory barriers > */ > > #ifndef ODP_API_SYNC_H_ > @@ -18,42 +18,66 @@ > extern "C" { > #endif > > -/** @addtogroup odp_barrier > +/** > + * @addtogroup odp_barrier > + * @details > + * <b> Memory barriers </b> > + * > + * Memory barriers enforce ordering of memory load and store operations > + * specified before and after the barrier. These barriers may affect both > + * compiler optimizations and CPU out-of-order execution. All ODP > + * synchronization mechanisms (e.g. execution barriers, locks, queues, > etc ) > + * include all necessary memory barriers, so these calls are not needed > when > + * using those. Also ODP atomic operations have memory ordered versions. > These > + * explicit barriers may be needed when thread synchronization is based on > + * a non-ODP defined mechanism. Depending on the HW platform, heavy usage > of > + * memory barriers may cause significant performance degradation. > + * > * @{ > */ > > /** > - * Synchronise stores > + * Memory barrier for release operations > * > - * Ensures that all CPU store operations that precede the > odp_sync_stores() > - * call are globally visible before any store operation that follows it. > + * This memory barrier has release semantics. It synchronizes with a > pairing > + * barrier for acquire operations. The releasing and acquiring threads > + * synchronize through shared memory. The releasing thread must call this > + * barrier before signaling the acquiring thread. After the acquiring > thread > + * receives the signal, it must call odp_mb_acquire() before it reads the > + * memory written by the releasing thread. > + * > + * This call is not needed when using ODP defined synchronization > mechanisms. > + * > + * @see odp_mb_acquire() > */ > -static inline void odp_sync_stores(void) > -{ > -#if defined __x86_64__ || defined __i386__ > - > - __asm__ __volatile__ ("sfence\n" : : : "memory"); > - > -#elif defined(__arm__) > -#if __ARM_ARCH == 6 > - __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ > - : : "r" (0) : "memory"); > -#elif __ARM_ARCH >= 7 || defined __aarch64__ > - > - __asm__ __volatile__ ("dmb st" : : : "memory"); > -#else > - __asm__ __volatile__ ("" : : : "memory"); > -#endif > - > -#elif defined __OCTEON__ > - > - __asm__ __volatile__ ("syncws\n" : : : "memory"); > +void odp_mb_release(void); > > -#else > - __sync_synchronize(); > -#endif > -} > +/** > + * Memory barrier for acquire operations > + * > + * This memory barrier has acquire semantics. It synchronizes with a > pairing > + * barrier for release operations. The releasing and acquiring threads > + * synchronize through shared memory. The releasing thread must call > + * odp_mb_release() before signaling the acquiring thread. After the > acquiring > + * thread receives the signal, it must call this barrier before it reads > the > + * memory written by the releasing thread. > + * > + * This call is not needed when using ODP defined synchronization > mechanisms. > + * > + * @see odp_mb_release() > + */ > +void odp_mb_acquire(void); > > +/** > + * Full memory barrier > + * > + * This is a full memory barrier. It guarantees that all load and store > + * operations specified before it are visible to other threads before > + * all load and store operations specified after it. > + * > + * This call is not needed when using ODP defined synchronization > mechanisms. > + */ > +void odp_mb_full(void); > > /** > * @} > diff --git a/platform/linux-generic/include/odp/sync.h > b/platform/linux-generic/include/odp/sync.h > index bc73083..bfe67ee 100644 > --- a/platform/linux-generic/include/odp/sync.h > +++ b/platform/linux-generic/include/odp/sync.h > @@ -17,6 +17,34 @@ > extern "C" { > #endif > > +/** @ingroup odp_barrier > + * @{ > + */ > + > +static inline void odp_mb_release(void) > +{ > + __atomic_thread_fence(__ATOMIC_RELEASE); > +} > + > +static inline void odp_mb_acquire(void) > +{ > + __atomic_thread_fence(__ATOMIC_ACQUIRE); > +} > + > +static inline void odp_mb_full(void) > +{ > + __atomic_thread_fence(__ATOMIC_SEQ_CST); > +} > + > +static inline void odp_sync_stores(void) > +{ > + __atomic_thread_fence(__ATOMIC_RELEASE); > +} > + > +/** > + * @} > + */ > + > #include <odp/api/sync.h> > > #ifdef __cplusplus > -- > 2.6.3 > > _______________________________________________ > lng-odp mailing list > [email protected] > https://lists.linaro.org/mailman/listinfo/lng-odp >
_______________________________________________ lng-odp mailing list [email protected] https://lists.linaro.org/mailman/listinfo/lng-odp
