Thanks Ola. The original bug is that this fails compiling with clang on 32-bit systems and the reason is that the struct in that environment winds up being 24 bytes rather than 16 bytes long, so the ODP_STATIC_ASSERT() fails.
My original (simple) patch is at http://patches.opendataplane.org/patch/5932/ however Maxim didn't like it. Perhaps you can offer a compromise? On Mon, May 16, 2016 at 4:21 PM, Ola Liljedahl <[email protected]> wrote: > I disagree. The 128-bit support is important because that's the lock-free > timer implementation which was the whole idea. The lock-based code is just > a work-around. We should rather add support in configure to detect compiler > support for the -mcx16 flag which enables 128-bit CAS on x86-64. Now when I > have an ARM64 target, I can actually write and test 128-bit CAS support on > ARM (A64 ISA in AArch64 mode) and contribute that in another patch. I am > not sure whether common compilers (e.g. GCC) properly supports 128-bit CAS > on ARM64, might have to use LDXP/STXP (load/store exclusive pair) directly. > > -- Ola > > On 16 May 2016 at 22:43, Bill Fischofer <[email protected]> wrote: > >> On Mon, May 16, 2016 at 2:15 PM, Maxim Uvarov <[email protected]> >> wrote: >> >> > Remove totally untested branch with 128 bit optimizations >> > for timer code. >> > This also fixes static assert bug: >> > https://bugs.linaro.org/show_bug.cgi?id=2211 >> > >> > Signed-off-by: Maxim Uvarov <[email protected]> >> > >> >> Reviewed-and-tested-by: Bill Fischofer <[email protected]> >> >> >> > --- >> > platform/linux-generic/odp_timer.c | 107 >> > ------------------------------------- >> > 1 file changed, 107 deletions(-) >> > >> > diff --git a/platform/linux-generic/odp_timer.c >> > b/platform/linux-generic/odp_timer.c >> > index 6b84309..be28da3 100644 >> > --- a/platform/linux-generic/odp_timer.c >> > +++ b/platform/linux-generic/odp_timer.c >> > @@ -11,15 +11,7 @@ >> > * >> > */ >> > >> > -/* Check if compiler supports 16-byte atomics. GCC needs -mcx16 flag on >> > x86 */ >> > -/* Using spin lock actually seems faster on Core2 */ >> > -#ifdef ODP_ATOMIC_U128 >> > -/* TB_NEEDS_PAD defined if sizeof(odp_buffer_t) != 8 */ >> > -#define TB_NEEDS_PAD >> > -#define TB_SET_PAD(x) ((x).pad = 0) >> > -#else >> > #define TB_SET_PAD(x) (void)(x) >> > -#endif >> > >> > #include <odp_posix_extensions.h> >> > >> > @@ -70,11 +62,9 @@ >> > * Mutual exclusion in the absence of CAS16 >> > >> > >> *****************************************************************************/ >> > >> > -#ifndef ODP_ATOMIC_U128 >> > #define NUM_LOCKS 1024 >> > static _odp_atomic_flag_t locks[NUM_LOCKS]; /* Multiple locks per cache >> > line! */ >> > #define IDX2LOCK(idx) (&locks[(idx) % NUM_LOCKS]) >> > -#endif >> > >> > >> > >> /****************************************************************************** >> > * Translation between timeout buffer and timeout header >> > @@ -98,17 +88,9 @@ static odp_timeout_hdr_t *timeout_hdr(odp_timeout_t >> tmo) >> > typedef struct tick_buf_s { >> > odp_atomic_u64_t exp_tck;/* Expiration tick or TMO_xxx */ >> > odp_buffer_t tmo_buf;/* ODP_BUFFER_INVALID if timer not active >> */ >> > -#ifdef TB_NEEDS_PAD >> > - uint32_t pad;/* Need to be able to access padding for successful >> > CAS */ >> > -#endif >> > } tick_buf_t >> > -#ifdef ODP_ATOMIC_U128 >> > -ODP_ALIGNED(16) /* 16-byte atomic operations need properly aligned >> > addresses */ >> > -#endif >> > ; >> > >> > -ODP_STATIC_ASSERT(sizeof(tick_buf_t) == 16, "sizeof(tick_buf_t) == >> 16"); >> > - >> > typedef struct odp_timer_s { >> > void *user_ptr; >> > odp_queue_t queue;/* Used for free list when timer is free */ >> > @@ -378,34 +360,6 @@ static bool timer_reset(uint32_t idx, >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > >> > if (tmo_buf == NULL || *tmo_buf == ODP_BUFFER_INVALID) { >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - do { >> > - /* Relaxed and non-atomic read of current >> values */ >> > - old.exp_tck.v = tb->exp_tck.v; >> > - old.tmo_buf = tb->tmo_buf; >> > - TB_SET_PAD(old); >> > - /* Check if there actually is a timeout buffer >> > - * present */ >> > - if (old.tmo_buf == ODP_BUFFER_INVALID) { >> > - /* Cannot reset a timer with neither old >> > nor >> > - * new timeout buffer */ >> > - success = false; >> > - break; >> > - } >> > - /* Set up new values */ >> > - new.exp_tck.v = abs_tck; >> > - new.tmo_buf = old.tmo_buf; >> > - TB_SET_PAD(new); >> > - /* Atomic CAS will fail if we experienced torn >> > reads, >> > - * retry update sequence until CAS succeeds */ >> > - } while (!_odp_atomic_u128_cmp_xchg_mm( >> > - (_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&old, >> > - (_uint128_t *)&new, >> > - _ODP_MEMMODEL_RLS, >> > - _ODP_MEMMODEL_RLX)); >> > -#else >> > #ifdef __ARM_ARCH >> > /* Since barriers are not good for C-A15, we take an >> > * alternative approach using relaxed memory model */ >> > @@ -453,7 +407,6 @@ static bool timer_reset(uint32_t idx, >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > #endif >> > -#endif >> > } else { >> > /* We have a new timeout buffer which replaces any old >> one >> > */ >> > /* Fill in some (constant) header fields for timeout >> > events */ >> > @@ -468,19 +421,6 @@ static bool timer_reset(uint32_t idx, >> > } >> > /* Else ignore buffers of other types */ >> > odp_buffer_t old_buf = ODP_BUFFER_INVALID; >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - new.exp_tck.v = abs_tck; >> > - new.tmo_buf = *tmo_buf; >> > - TB_SET_PAD(new); >> > - /* We are releasing the new timeout buffer to some other >> > - * thread */ >> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&new, >> > - (_uint128_t *)&old, >> > - _ODP_MEMMODEL_ACQ_RLS); >> > - old_buf = old.tmo_buf; >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed >> loads */ >> > @@ -496,7 +436,6 @@ static bool timer_reset(uint32_t idx, >> > >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > /* Return old timeout buffer */ >> > *tmo_buf = old_buf; >> > } >> > @@ -510,18 +449,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool >> *tp, >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > odp_buffer_t old_buf; >> > >> > -#ifdef ODP_ATOMIC_U128 >> > - tick_buf_t new, old; >> > - /* Update the timer state (e.g. cancel the current timeout) */ >> > - new.exp_tck.v = new_state; >> > - /* Swap out the old buffer */ >> > - new.tmo_buf = ODP_BUFFER_INVALID; >> > - TB_SET_PAD(new); >> > - _odp_atomic_u128_xchg_mm((_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&new, (_uint128_t *)&old, >> > - _ODP_MEMMODEL_RLX); >> > - old_buf = old.tmo_buf; >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed loads */ >> > @@ -537,7 +464,6 @@ static odp_buffer_t timer_cancel(odp_timer_pool *tp, >> > >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > /* Return the old buffer */ >> > return old_buf; >> > } >> > @@ -548,34 +474,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >> > uint32_t idx, uint64_t tick) >> > tick_buf_t *tb = &tp->tick_buf[idx]; >> > odp_buffer_t tmo_buf = ODP_BUFFER_INVALID; >> > uint64_t exp_tck; >> > -#ifdef ODP_ATOMIC_U128 >> > - /* Atomic re-read for correctness */ >> > - exp_tck = _odp_atomic_u64_load_mm(&tb->exp_tck, >> _ODP_MEMMODEL_RLX); >> > - /* Re-check exp_tck */ >> > - if (odp_likely(exp_tck <= tick)) { >> > - /* Attempt to grab timeout buffer, replace with inactive >> > timer >> > - * and invalid buffer */ >> > - tick_buf_t new, old; >> > - old.exp_tck.v = exp_tck; >> > - old.tmo_buf = tb->tmo_buf; >> > - TB_SET_PAD(old); >> > - /* Set the inactive/expired bit keeping the expiration >> > tick so >> > - * that we can check against the expiration tick of the >> > timeout >> > - * when it is received */ >> > - new.exp_tck.v = exp_tck | TMO_INACTIVE; >> > - new.tmo_buf = ODP_BUFFER_INVALID; >> > - TB_SET_PAD(new); >> > - int succ = _odp_atomic_u128_cmp_xchg_mm( >> > - (_odp_atomic_u128_t *)tb, >> > - (_uint128_t *)&old, (_uint128_t *)&new, >> > - _ODP_MEMMODEL_RLS, _ODP_MEMMODEL_RLX); >> > - if (succ) >> > - tmo_buf = old.tmo_buf; >> > - /* Else CAS failed, something changed => skip timer >> > - * this tick, it will be checked again next tick */ >> > - } >> > - /* Else false positive, ignore */ >> > -#else >> > /* Take a related lock */ >> > while (_odp_atomic_flag_tas(IDX2LOCK(idx))) >> > /* While lock is taken, spin using relaxed loads */ >> > @@ -600,7 +498,6 @@ static unsigned timer_expire(odp_timer_pool *tp, >> > uint32_t idx, uint64_t tick) >> > /* Else false positive, ignore */ >> > /* Release the lock */ >> > _odp_atomic_flag_clear(IDX2LOCK(idx)); >> > -#endif >> > if (odp_likely(tmo_buf != ODP_BUFFER_INVALID)) { >> > /* Fill in expiration tick for timeout events */ >> > if (odp_event_type(odp_buffer_to_event(tmo_buf)) == >> > @@ -974,13 +871,9 @@ void odp_timeout_free(odp_timeout_t tmo) >> > >> > int odp_timer_init_global(void) >> > { >> > -#ifndef ODP_ATOMIC_U128 >> > uint32_t i; >> > for (i = 0; i < NUM_LOCKS; i++) >> > _odp_atomic_flag_clear(&locks[i]); >> > -#else >> > - ODP_DBG("Using lock-less timer implementation\n"); >> > -#endif >> > odp_atomic_init_u32(&num_timer_pools, 0); >> > >> > block_sigalarm(); >> > -- >> > 2.7.1.250.gff4ea60 >> > >> > _______________________________________________ >> > lng-odp mailing list >> > [email protected] >> > https://lists.linaro.org/mailman/listinfo/lng-odp >> > >> _______________________________________________ >> lng-odp mailing list >> [email protected] >> https://lists.linaro.org/mailman/listinfo/lng-odp >> > > _______________________________________________ lng-odp mailing list [email protected] https://lists.linaro.org/mailman/listinfo/lng-odp
