On 2016-03-31 12:58:55 +0200, Andres Freund wrote: > On 2016-03-31 06:54:02 -0400, Robert Haas wrote: > > On Wed, Mar 30, 2016 at 3:16 AM, Andres Freund <and...@anarazel.de> wrote: > > > Yea, as Tom pointed out that's not going to work. I'll try to write a > > > patch for approach 1). > > > > Does this mean that any platform that wants to perform well will now > > need a sub-4-byte spinlock implementation? That's has a somewhat > > uncomfortable sound to it. > > Oh. I confused my approaches. I was thinking about going for 2): > > > 2) Replace the lwlock spinlock by a bit in LWLock->state. That'd avoid > > embedding the spinlock, and actually might allow to avoid one atomic > > op in a number of cases. > > precisely because of that concern.
Here's a WIP patch to evaluate. Dilip/Ashutosh, could you perhaps run some benchmarks, to see whether this addresses the performance issues? I guess it'd both be interesting to compare master with master + patch, and this thread's latest patch with the patch additionally applied. Thanks! Andres
>From 623581574409bdf5cbfbba005bb2e961cb689573 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 31 Mar 2016 14:14:20 +0200 Subject: [PATCH 1/4] WIP: Avoid the use of a separate spinlock to protect LWLock's wait queue. Previously we used a spinlock, in adition to the atomically manipulated ->state field, to protect the wait queue. But it's pretty simple to instead perform the locking using a flag in state. This tries to address a performance regression on PPC due to 6150a1b0. Our PPC spinlocks are 4 byte each, which makes BufferDesc exceed 64bytes, causing cacheline-sharing issues.x Discussion: caa4ek1+zeb8pmwwktf+3brs0pt4ux6rs6aom0uip8c6shjw...@mail.gmail.com 20160327121858.zrmrjegmji2ym...@alap3.anarazel.de --- src/backend/storage/lmgr/lwlock.c | 178 ++++++++++++++++++++------------------ src/include/storage/lwlock.h | 1 - 2 files changed, 94 insertions(+), 85 deletions(-) diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 53ae7d5..ec6baf6 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -97,6 +97,7 @@ extern slock_t *ShmemLock; #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30) #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29) +#define LW_FLAG_LOCKED ((uint32) 1 << 28) #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24) #define LW_VAL_SHARED 1 @@ -711,7 +712,6 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks) void LWLockInitialize(LWLock *lock, int tranche_id) { - SpinLockInit(&lock->mutex); pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK); #ifdef LOCK_DEBUG pg_atomic_init_u32(&lock->nwaiters, 0); @@ -842,6 +842,56 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode) pg_unreachable(); } +static void +LWLockWaitListLock(LWLock *lock) +{ + uint32 old_state; +#ifdef LWLOCK_STATS + lwlock_stats *lwstats; + uint32 delays = 0; + + lwstats = get_lwlock_stats_entry(lock); +#endif + + old_state = pg_atomic_read_u32(&lock->state); + while (true) + { + if (old_state & LW_FLAG_LOCKED) + { + /* FIXME: add exponential backoff */ + pg_spin_delay(); + old_state = pg_atomic_read_u32(&lock->state); +#ifdef LWLOCK_STATS + delays++; +#endif + } + else + { + old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED); + if (!(old_state & LW_FLAG_LOCKED)) + { + /* got lock */ + break; + } + } + } + +#ifdef LWLOCK_STATS + lwstats->spin_delay_count += delays; +#endif + +} + +static void +LWLockWaitListUnlock(LWLock *lock) +{ + uint32 old_state PG_USED_FOR_ASSERTS_ONLY; + + old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED); + + Assert(old_state & LW_FLAG_LOCKED); +} + /* * Wakeup all the lockers that currently have a chance to acquire the lock. */ @@ -852,22 +902,13 @@ LWLockWakeup(LWLock *lock) bool wokeup_somebody = false; dlist_head wakeup; dlist_mutable_iter iter; -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); -#endif dlist_init(&wakeup); new_release_ok = true; /* Acquire mutex. Time spent holding mutex should be short! */ -#ifdef LWLOCK_STATS - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); dlist_foreach_modify(iter, &lock->waiters) { @@ -904,19 +945,34 @@ LWLockWakeup(LWLock *lock) Assert(dlist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS); - /* Unset both flags at once if required */ - if (!new_release_ok && dlist_is_empty(&wakeup)) - pg_atomic_fetch_and_u32(&lock->state, - ~(LW_FLAG_RELEASE_OK | LW_FLAG_HAS_WAITERS)); - else if (!new_release_ok) - pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_RELEASE_OK); - else if (dlist_is_empty(&wakeup)) - pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS); - else if (new_release_ok) - pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK); + /* unset required flags, and release lock, in one fell swoop */ + { + uint32 old_state; + uint32 desired_state; - /* We are done updating the shared state of the lock queue. */ - SpinLockRelease(&lock->mutex); + old_state = pg_atomic_read_u32(&lock->state); + while (true) + { + desired_state = old_state; + + /* Unset both flags at once if required */ + if (!new_release_ok && dlist_is_empty(&wakeup)) + desired_state &= ~(LW_FLAG_RELEASE_OK | LW_FLAG_HAS_WAITERS); + else if (!new_release_ok) + desired_state &= ~LW_FLAG_RELEASE_OK; + else if (dlist_is_empty(&wakeup)) + desired_state &= ~LW_FLAG_HAS_WAITERS; + else if (new_release_ok) + desired_state |= LW_FLAG_RELEASE_OK; + + /* release lock */ + desired_state &= ~LW_FLAG_LOCKED; + + if (pg_atomic_compare_exchange_u32(&lock->state, &old_state, + desired_state)) + break; + } + } /* Awaken any waiters I removed from the queue. */ dlist_foreach_modify(iter, &wakeup) @@ -933,7 +989,7 @@ LWLockWakeup(LWLock *lock) * that happens before the list unlink happens, the list would end up * being corrupted. * - * The barrier pairs with the SpinLockAcquire() when enqueing for + * The barrier pairs with the LWLockWaitListLock() when enqueing for * another lock. */ pg_write_barrier(); @@ -950,12 +1006,6 @@ LWLockWakeup(LWLock *lock) static void LWLockQueueSelf(LWLock *lock, LWLockMode mode) { -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); -#endif - /* * If we don't have a PGPROC structure, there's no way to wait. This * should never occur, since MyProc should only be null during shared @@ -967,11 +1017,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) if (MyProc->lwWaiting) elog(PANIC, "queueing for lock while waiting on another one"); -#ifdef LWLOCK_STATS - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); /* setting the flag is protected by the spinlock */ pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS); @@ -986,7 +1032,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) dlist_push_tail(&lock->waiters, &MyProc->lwWaitLink); /* Can release the mutex now */ - SpinLockRelease(&lock->mutex); + LWLockWaitListUnlock(lock); #ifdef LOCK_DEBUG pg_atomic_fetch_add_u32(&lock->nwaiters, 1); @@ -1007,19 +1053,7 @@ LWLockDequeueSelf(LWLock *lock) bool found = false; dlist_mutable_iter iter; -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); - - lwstats->dequeue_self_count++; -#endif - -#ifdef LWLOCK_STATS - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); /* * Can't just remove ourselves from the list, but we need to iterate over @@ -1043,7 +1077,8 @@ LWLockDequeueSelf(LWLock *lock) pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS); } - SpinLockRelease(&lock->mutex); + /* XXX: combine with fetch_and above? */ + LWLockWaitListUnlock(lock); /* clear waiting state again, nice for debugging */ if (found) @@ -1460,11 +1495,6 @@ LWLockConflictsWithVar(LWLock *lock, { bool mustwait; uint64 value; -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); -#endif /* * Test first to see if it the slot is free right now. @@ -1484,17 +1514,13 @@ LWLockConflictsWithVar(LWLock *lock, *result = false; /* - * Read value using spinlock as we can't rely on atomic 64 bit - * reads/stores. TODO: On platforms with a way to do atomic 64 bit - * reads/writes the spinlock could be optimized away. + * Read value using the lwlock's internal lock, as we can't generally rely + * on atomic 64 bit reads/stores. TODO: On platforms with a way to do + * atomic 64 bit reads/writes the spinlock could be optimized away. */ -#ifdef LWLOCK_STATS - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); value = *valptr; - SpinLockRelease(&lock->mutex); + LWLockWaitListUnlock(lock); if (value != oldval) { @@ -1668,22 +1694,13 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) { dlist_head wakeup; dlist_mutable_iter iter; -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); -#endif PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE); dlist_init(&wakeup); /* Acquire mutex. Time spent holding mutex should be short! */ -#ifdef LWLOCK_STATS - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE); @@ -1706,7 +1723,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) } /* We are done updating shared state of the lock itself. */ - SpinLockRelease(&lock->mutex); + LWLockWaitListUnlock(lock); /* * Awaken any waiters I removed from the queue. @@ -1804,21 +1821,14 @@ LWLockRelease(LWLock *lock) void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val) { -#ifdef LWLOCK_STATS - lwlock_stats *lwstats; - - lwstats = get_lwlock_stats_entry(lock); - lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex); -#else - SpinLockAcquire(&lock->mutex); -#endif + LWLockWaitListLock(lock); /* * Set the variable's value before releasing the lock, that prevents race * a race condition wherein a new locker acquires the lock, but hasn't yet * set the variables value. */ *valptr = val; - SpinLockRelease(&lock->mutex); + LWLockWaitListUnlock(lock); LWLockRelease(lock); } diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 5e6299a..f5b94e0 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -57,7 +57,6 @@ typedef struct LWLockTranche */ typedef struct LWLock { - slock_t mutex; /* Protects LWLock and queue of PGPROCs */ uint16 tranche; /* tranche ID */ pg_atomic_uint32 state; /* state of exclusive/nonexclusive lockers */ -- 2.7.0.229.g701fa7f.dirty
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers