On 2016-03-31 12:58:55 +0200, Andres Freund wrote:
> On 2016-03-31 06:54:02 -0400, Robert Haas wrote:
> > On Wed, Mar 30, 2016 at 3:16 AM, Andres Freund <and...@anarazel.de> wrote:
> > > Yea, as Tom pointed out that's not going to work.  I'll try to write a
> > > patch for approach 1).
> > 
> > Does this mean that any platform that wants to perform well will now
> > need a sub-4-byte spinlock implementation?  That's has a somewhat
> > uncomfortable sound to it.
> 
> Oh. I confused my approaches. I was thinking about going for 2):
> 
> > 2) Replace the lwlock spinlock by a bit in LWLock->state. That'd avoid
> >    embedding the spinlock, and actually might allow to avoid one atomic
> >    op in a number of cases.
> 
> precisely because of that concern.

Here's a WIP patch to evaluate. Dilip/Ashutosh, could you perhaps run
some benchmarks, to see whether this addresses the performance issues?

I guess it'd both be interesting to compare master with master + patch,
and this thread's latest patch with the patch additionally applied.

Thanks!

Andres
>From 623581574409bdf5cbfbba005bb2e961cb689573 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Thu, 31 Mar 2016 14:14:20 +0200
Subject: [PATCH 1/4] WIP: Avoid the use of a separate spinlock to protect
 LWLock's wait queue.

Previously we used a spinlock, in adition to the atomically manipulated
->state field, to protect the wait queue. But it's pretty simple to
instead perform the locking using a flag in state.

This tries to address a performance regression on PPC due to
6150a1b0. Our PPC spinlocks are 4 byte each, which makes BufferDesc
exceed 64bytes, causing cacheline-sharing issues.x

Discussion: caa4ek1+zeb8pmwwktf+3brs0pt4ux6rs6aom0uip8c6shjw...@mail.gmail.com
    20160327121858.zrmrjegmji2ym...@alap3.anarazel.de
---
 src/backend/storage/lmgr/lwlock.c | 178 ++++++++++++++++++++------------------
 src/include/storage/lwlock.h      |   1 -
 2 files changed, 94 insertions(+), 85 deletions(-)

diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 53ae7d5..ec6baf6 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -97,6 +97,7 @@ extern slock_t *ShmemLock;
 
 #define LW_FLAG_HAS_WAITERS			((uint32) 1 << 30)
 #define LW_FLAG_RELEASE_OK			((uint32) 1 << 29)
+#define LW_FLAG_LOCKED				((uint32) 1 << 28)
 
 #define LW_VAL_EXCLUSIVE			((uint32) 1 << 24)
 #define LW_VAL_SHARED				1
@@ -711,7 +712,6 @@ RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
 void
 LWLockInitialize(LWLock *lock, int tranche_id)
 {
-	SpinLockInit(&lock->mutex);
 	pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
 #ifdef LOCK_DEBUG
 	pg_atomic_init_u32(&lock->nwaiters, 0);
@@ -842,6 +842,56 @@ LWLockAttemptLock(LWLock *lock, LWLockMode mode)
 	pg_unreachable();
 }
 
+static void
+LWLockWaitListLock(LWLock *lock)
+{
+	uint32 old_state;
+#ifdef LWLOCK_STATS
+	lwlock_stats *lwstats;
+	uint32 delays = 0;
+
+	lwstats = get_lwlock_stats_entry(lock);
+#endif
+
+	old_state = pg_atomic_read_u32(&lock->state);
+	while (true)
+	{
+		if (old_state & LW_FLAG_LOCKED)
+		{
+			/* FIXME: add exponential backoff */
+			pg_spin_delay();
+			old_state = pg_atomic_read_u32(&lock->state);
+#ifdef LWLOCK_STATS
+			delays++;
+#endif
+		}
+		else
+		{
+			old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
+			if (!(old_state & LW_FLAG_LOCKED))
+			{
+				/* got lock */
+				break;
+			}
+		}
+	}
+
+#ifdef LWLOCK_STATS
+	lwstats->spin_delay_count += delays;
+#endif
+
+}
+
+static void
+LWLockWaitListUnlock(LWLock *lock)
+{
+	uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
+
+	old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
+
+	Assert(old_state & LW_FLAG_LOCKED);
+}
+
 /*
  * Wakeup all the lockers that currently have a chance to acquire the lock.
  */
@@ -852,22 +902,13 @@ LWLockWakeup(LWLock *lock)
 	bool		wokeup_somebody = false;
 	dlist_head	wakeup;
 	dlist_mutable_iter iter;
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-#endif
 
 	dlist_init(&wakeup);
 
 	new_release_ok = true;
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-#ifdef LWLOCK_STATS
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 
 	dlist_foreach_modify(iter, &lock->waiters)
 	{
@@ -904,19 +945,34 @@ LWLockWakeup(LWLock *lock)
 
 	Assert(dlist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
 
-	/* Unset both flags at once if required */
-	if (!new_release_ok && dlist_is_empty(&wakeup))
-		pg_atomic_fetch_and_u32(&lock->state,
-								~(LW_FLAG_RELEASE_OK | LW_FLAG_HAS_WAITERS));
-	else if (!new_release_ok)
-		pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_RELEASE_OK);
-	else if (dlist_is_empty(&wakeup))
-		pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
-	else if (new_release_ok)
-		pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
+	/* unset required flags, and release lock, in one fell swoop */
+	{
+		uint32 old_state;
+		uint32 desired_state;
 
-	/* We are done updating the shared state of the lock queue. */
-	SpinLockRelease(&lock->mutex);
+		old_state = pg_atomic_read_u32(&lock->state);
+		while (true)
+		{
+			desired_state = old_state;
+
+			/* Unset both flags at once if required */
+			if (!new_release_ok && dlist_is_empty(&wakeup))
+				desired_state &= ~(LW_FLAG_RELEASE_OK | LW_FLAG_HAS_WAITERS);
+			else if (!new_release_ok)
+				desired_state &= ~LW_FLAG_RELEASE_OK;
+			else if (dlist_is_empty(&wakeup))
+				desired_state &= ~LW_FLAG_HAS_WAITERS;
+			else if (new_release_ok)
+				desired_state |= LW_FLAG_RELEASE_OK;
+
+			/* release lock */
+			desired_state &= ~LW_FLAG_LOCKED;
+
+			if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
+											   desired_state))
+				break;
+		}
+	}
 
 	/* Awaken any waiters I removed from the queue. */
 	dlist_foreach_modify(iter, &wakeup)
@@ -933,7 +989,7 @@ LWLockWakeup(LWLock *lock)
 		 * that happens before the list unlink happens, the list would end up
 		 * being corrupted.
 		 *
-		 * The barrier pairs with the SpinLockAcquire() when enqueing for
+		 * The barrier pairs with the LWLockWaitListLock() when enqueing for
 		 * another lock.
 		 */
 		pg_write_barrier();
@@ -950,12 +1006,6 @@ LWLockWakeup(LWLock *lock)
 static void
 LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 {
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-#endif
-
 	/*
 	 * If we don't have a PGPROC structure, there's no way to wait. This
 	 * should never occur, since MyProc should only be null during shared
@@ -967,11 +1017,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 	if (MyProc->lwWaiting)
 		elog(PANIC, "queueing for lock while waiting on another one");
 
-#ifdef LWLOCK_STATS
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 
 	/* setting the flag is protected by the spinlock */
 	pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
@@ -986,7 +1032,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode)
 		dlist_push_tail(&lock->waiters, &MyProc->lwWaitLink);
 
 	/* Can release the mutex now */
-	SpinLockRelease(&lock->mutex);
+	LWLockWaitListUnlock(lock);
 
 #ifdef LOCK_DEBUG
 	pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
@@ -1007,19 +1053,7 @@ LWLockDequeueSelf(LWLock *lock)
 	bool		found = false;
 	dlist_mutable_iter iter;
 
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-
-	lwstats->dequeue_self_count++;
-#endif
-
-#ifdef LWLOCK_STATS
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 
 	/*
 	 * Can't just remove ourselves from the list, but we need to iterate over
@@ -1043,7 +1077,8 @@ LWLockDequeueSelf(LWLock *lock)
 		pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
 	}
 
-	SpinLockRelease(&lock->mutex);
+	/* XXX: combine with fetch_and above? */
+	LWLockWaitListUnlock(lock);
 
 	/* clear waiting state again, nice for debugging */
 	if (found)
@@ -1460,11 +1495,6 @@ LWLockConflictsWithVar(LWLock *lock,
 {
 	bool		mustwait;
 	uint64		value;
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-#endif
 
 	/*
 	 * Test first to see if it the slot is free right now.
@@ -1484,17 +1514,13 @@ LWLockConflictsWithVar(LWLock *lock,
 	*result = false;
 
 	/*
-	 * Read value using spinlock as we can't rely on atomic 64 bit
-	 * reads/stores.  TODO: On platforms with a way to do atomic 64 bit
-	 * reads/writes the spinlock could be optimized away.
+	 * Read value using the lwlock's internal lock, as we can't generally rely
+	 * on atomic 64 bit reads/stores.  TODO: On platforms with a way to do
+	 * atomic 64 bit reads/writes the spinlock could be optimized away.
 	 */
-#ifdef LWLOCK_STATS
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 	value = *valptr;
-	SpinLockRelease(&lock->mutex);
+	LWLockWaitListUnlock(lock);
 
 	if (value != oldval)
 	{
@@ -1668,22 +1694,13 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
 {
 	dlist_head	wakeup;
 	dlist_mutable_iter iter;
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-#endif
 
 	PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
 
 	dlist_init(&wakeup);
 
 	/* Acquire mutex.  Time spent holding mutex should be short! */
-#ifdef LWLOCK_STATS
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 
 	Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
 
@@ -1706,7 +1723,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
 	}
 
 	/* We are done updating shared state of the lock itself. */
-	SpinLockRelease(&lock->mutex);
+	LWLockWaitListUnlock(lock);
 
 	/*
 	 * Awaken any waiters I removed from the queue.
@@ -1804,21 +1821,14 @@ LWLockRelease(LWLock *lock)
 void
 LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
 {
-#ifdef LWLOCK_STATS
-	lwlock_stats *lwstats;
-
-	lwstats = get_lwlock_stats_entry(lock);
-	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
-#else
-	SpinLockAcquire(&lock->mutex);
-#endif
+	LWLockWaitListLock(lock);
 	/*
 	 * Set the variable's value before releasing the lock, that prevents race
 	 * a race condition wherein a new locker acquires the lock, but hasn't yet
 	 * set the variables value.
 	 */
 	*valptr = val;
-	SpinLockRelease(&lock->mutex);
+	LWLockWaitListUnlock(lock);
 
 	LWLockRelease(lock);
 }
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 5e6299a..f5b94e0 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -57,7 +57,6 @@ typedef struct LWLockTranche
  */
 typedef struct LWLock
 {
-	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
 	uint16		tranche;		/* tranche ID */
 
 	pg_atomic_uint32 state;		/* state of exclusive/nonexclusive lockers */
-- 
2.7.0.229.g701fa7f.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to