Re: [HACKERS] dynamic shared memory and locks

Robert Haas Mon, 06 Jan 2014 10:56:34 -0800

On Mon, Jan 6, 2014 at 11:22 AM, Tom Lane <t...@sss.pgh.pa.us> wrote:
> I think we can eliminate the first of those.  Semaphores for spinlocks
> were a performance disaster fifteen years ago, and the situation has
> surely only gotten worse since then.  I do, however, believe that
> --disable-spinlocks has some use while porting to a new platform.
> (And I don't believe the argument advanced elsewhere in this thread
> that everybody uses gcc; much less that gcc's atomic intrinsics are
> of uniformly high quality even on oddball architectures.)
>
> Heikki's idea has some attraction for porting work whether or not
> you believe that DSM needs to work during initial porting.  By
> default, PG will try to create upwards of ten thousand spinlocks
> just for buffer headers.  It's likely that that will fail unless
> you whack around the kernel's SysV parameters.  Being able to
> constrain the number of semaphores to something sane would probably
> be useful.
>
> Having said all that, I'm not personally going to take the time to
> implement it, and I don't think the DSM patch should be required to
> either.  Somebody who actually needs it can go make it work.


Well, I took a look at this and it turns out not to be very hard, so
here's a patch.  Currently, we allocate 3 semaphore per shared buffer
and a bunch of others, but the 3 per shared buffer dominates, so you
end up with ~49k spinlocks for the default of 128MB shared_buffers.  I
chose to peg the number of semaphores at 1024, which is quite small
compared to the current allocation, but the number of spinlock
allocations that can be in progress at any given time is limited by
the number of running backends.  Even allowing for the birthday
paradox, that should be enough to run at least a few dozen backends
without suffering serious problems due to the multiplexing -
especially because in real workloads, contention is usually
concentrated around a small number of spinlocks that are unlikely to
all be mapped to the same underlying semaphore.

I'm happy enough with this way forward.  Objections?

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 048a189..7eae2a1 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -471,6 +471,9 @@ typedef struct
 	slock_t    *ShmemLock;
 	VariableCache ShmemVariableCache;
 	Backend    *ShmemBackendArray;
+#ifndef HAVE_SPINLOCKS
+	PGSemaphore	SpinlockSemaArray;
+#endif
 	LWLock	   *LWLockArray;
 	slock_t    *ProcStructLock;
 	PROC_HDR   *ProcGlobal;
@@ -5626,6 +5629,9 @@ save_backend_variables(BackendParameters *param, Port *port,
 	param->ShmemVariableCache = ShmemVariableCache;
 	param->ShmemBackendArray = ShmemBackendArray;
 
+#ifndef HAVE_SPINLOCKS
+	param->SpinlockSemaArray = SpinlockSemaArray;
+#endif
 	param->LWLockArray = LWLockArray;
 	param->ProcStructLock = ProcStructLock;
 	param->ProcGlobal = ProcGlobal;
@@ -5854,6 +5860,9 @@ restore_backend_variables(BackendParameters *param, Port *port)
 	ShmemVariableCache = param->ShmemVariableCache;
 	ShmemBackendArray = param->ShmemBackendArray;
 
+#ifndef HAVE_SPINLOCKS
+	SpinlockSemaArray = param->SpinlockSemaArray;
+#endif
 	LWLockArray = param->LWLockArray;
 	ProcStructLock = param->ProcStructLock;
 	ProcGlobal = param->ProcGlobal;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 040c7aa..ad663ec 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -105,6 +105,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 		 * need to be so careful during the actual allocation phase.
 		 */
 		size = 100000;
+		size = add_size(size, SpinlockSemaSize());
 		size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
 												 sizeof(ShmemIndexEnt)));
 		size = add_size(size, BufferShmemSize());
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 18ba426..4efd0fb 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -116,9 +116,24 @@ InitShmemAllocation(void)
 	Assert(shmhdr != NULL);
 
 	/*
-	 * Initialize the spinlock used by ShmemAlloc.	We have to do the space
-	 * allocation the hard way, since obviously ShmemAlloc can't be called
-	 * yet.
+	 * If spinlocks are disabled, initialize emulation layer.  We have to do
+	 * the space allocation the hard way, since obviously ShmemAlloc can't be
+	 * called yet.
+	 */
+#ifndef HAVE_SPINLOCKS
+	{
+		PGSemaphore spinsemas;
+
+		spinsemas = (PGSemaphore) (((char *) shmhdr) + shmhdr->freeoffset);
+		shmhdr->freeoffset += MAXALIGN(SpinlockSemaSize());
+		SpinlockSemaInit(spinsemas);
+		Assert(shmhdr->freeoffset <= shmhdr->totalsize);
+	}
+#endif
+
+	/*
+	 * Initialize the spinlock used by ShmemAlloc; we have to do this the hard
+	 * way, too, for the same reasons as above.
 	 */
 	ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
 	shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index f054be8..e108a37 100644
--- a/src/backend/storage/lmgr/spin.c
+++ b/src/backend/storage/lmgr/spin.c
@@ -28,6 +28,21 @@
 #include "storage/lwlock.h"
 #include "storage/spin.h"
 
+#ifdef USE_ASSERT_CHECKING
+bool any_spinlock_held;
+#endif
+
+PGSemaphore SpinlockSemaArray;
+
+/*
+ * Report the amount of shared memory needed to store semaphores for spinlock
+ * support.
+ */
+Size
+SpinlockSemaSize(void)
+{
+	return SpinlockSemas() * sizeof(PGSemaphoreData);
+}
 
 #ifdef HAVE_SPINLOCKS
 
@@ -43,8 +58,17 @@ SpinlockSemas(void)
 
 /*
  * No TAS, so spinlocks are implemented as PGSemaphores.
+ *
+ * In order to avoid requesting too many semaphores from the operating system,
+ * we multiplex however many spinlocks exist across a smaller number of OS
+ * semaphores.  This works reasonably well as long as the number of concurrent
+ * processes is small enough to make it unlikely that two processes will try
+ * to grab two different spinlocks that map to the same semaphore at the same
+ * time.  If this is not the case in your environment, you can raise this
+ * constant or, better yet, supply a real spinlock implementation for your
+ * platform.
  */
-
+#define	NUM_SPINLOCK_SEMAPHORES		1024
 
 /*
  * Report number of semaphores needed to support spinlocks.
@@ -52,22 +76,20 @@ SpinlockSemas(void)
 int
 SpinlockSemas(void)
 {
-	int			nsemas;
-
-	/*
-	 * It would be cleaner to distribute this logic into the affected modules,
-	 * similar to the way shmem space estimation is handled.
-	 *
-	 * For now, though, there are few enough users of spinlocks that we just
-	 * keep the knowledge here.
-	 */
-	nsemas = NumLWLocks();		/* one for each lwlock */
-	nsemas += NBuffers;			/* one for each buffer header */
-	nsemas += max_wal_senders;	/* one for each wal sender process */
-	nsemas += num_xloginsert_slots; /* one for each WAL insertion slot */
-	nsemas += 30;				/* plus a bunch for other small-scale use */
-
-	return nsemas;
+	return NUM_SPINLOCK_SEMAPHORES;
+}
+
+/*
+ * Initialize semaphores.
+ */
+extern void
+SpinlockSemaInit(PGSemaphore spinsemas)
+{
+	int	i;
+
+	for (i = 0; i < NUM_SPINLOCK_SEMAPHORES; ++i)
+		PGSemaphoreCreate(&spinsemas[i]);
+	SpinlockSemaArray = spinsemas;
 }
 
 /*
@@ -77,13 +99,15 @@ SpinlockSemas(void)
 void
 s_init_lock_sema(volatile slock_t *lock)
 {
-	PGSemaphoreCreate((PGSemaphore) lock);
+	static int counter = 0;
+
+	*lock = (++counter) % NUM_SPINLOCK_SEMAPHORES;
 }
 
 void
 s_unlock_sema(volatile slock_t *lock)
 {
-	PGSemaphoreUnlock((PGSemaphore) lock);
+	PGSemaphoreUnlock(&SpinlockSemaArray[*lock]);
 }
 
 bool
@@ -98,7 +122,7 @@ int
 tas_sema(volatile slock_t *lock)
 {
 	/* Note that TAS macros return 0 if *success* */
-	return !PGSemaphoreTryLock((PGSemaphore) lock);
+	return !PGSemaphoreTryLock(&SpinlockSemaArray[*lock]);
 }
 
 #endif   /* !HAVE_SPINLOCKS */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7dcd5d9..6adecbb 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -915,7 +915,7 @@ spin_delay(void)
  * to fall foul of kernel limits on number of semaphores, so don't use this
  * unless you must!  The subroutines appear in spin.c.
  */
-typedef PGSemaphoreData slock_t;
+typedef int slock_t;
 
 extern bool s_lock_free_sema(volatile slock_t *lock);
 extern void s_unlock_sema(volatile slock_t *lock);
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index f459b90..6193a8c 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -60,14 +60,35 @@
 
 
 #define SpinLockInit(lock)	S_INIT_LOCK(lock)
+#define SpinLockFree(lock)	S_LOCK_FREE(lock)
 
+/*
+ * Spinlocks should only be acquired in straight-line code that does not
+ * loop, so no code should ever acquire a second spinlock while already
+ * holding one.  This is particularly important when --disable-spinlocks is
+ * in use, because in that case we map all of the spinlocks in the system
+ * onto a smaller number of semaphores allocated from the OS; attempting
+ * to acquire more than one at a time could self-deadlock.
+ */
+#ifdef USE_ASSERT_CHECKING
+extern bool any_spinlock_held;
+#define SpinLockAcquire(lock) \
+	(AssertMacro(!any_spinlock_held), any_spinlock_held = true, \
+		S_LOCK(lock))
+#define SpinLockRelease(lock) \
+	(AssertMacro(any_spinlock_held), any_spinlock_held = false, \
+		S_UNLOCK(lock))
+#else
 #define SpinLockAcquire(lock) S_LOCK(lock)
-
 #define SpinLockRelease(lock) S_UNLOCK(lock)
-
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
-
+#endif
 
 extern int	SpinlockSemas(void);
+extern Size	SpinlockSemaSize(void);
+
+#ifndef HAVE_SPINLOCKS
+extern void	SpinlockSemaInit(PGSemaphore);
+extern PGSemaphore	SpinlockSemaArray;
+#endif
 
 #endif   /* SPIN_H */

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] dynamic shared memory and locks

Reply via email to