On 2014-04-24 23:28:14 +0200, Andres Freund wrote:
> On 2014-04-24 12:43:13 -0400, Tom Lane wrote:
> > Andres Freund <and...@2ndquadrant.com> writes:
> > > On 2014-04-24 11:02:44 -0400, Tom Lane wrote:
> > >> FWIW, I like the LWLockAssignBatch idea a lot better than the currently
> > >> proposed patch.  LWLockAssign is a low-level function that has no 
> > >> business
> > >> making risky assumptions about the context it's invoked in.
> > 
> > > I don't think LWLockAssignBatch() is that easy without introducing
> > > layering violations. It can't just return a pointer out of the main
> > > lwlock array that then can be ++ed clientside because MainLWLockArray's
> > > stride isn't sizeof(LWLock).
> > 
> > Meh.  I knew this business of using pointers instead of indexes would
> > have some downsides.
> > 
> > We could return the array stride ... kinda ugly, but since there's
> > probably only one consumer for this API, it's not *that* bad.  Could
> > wrap the stride-increment in a macro, perhaps.
> 
> I think I am just going to wait for 9.5 where I sure hope we can
> allocate the buffer lwlocks outside the main array...

For reference (and backup), here's my current patch for that.

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From 24cd57b86a5ad4dc625daa61b62663bb796a5e38 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 28 Jan 2014 17:06:01 +0100
Subject: [PATCH] lwlock-inline

---
 src/backend/storage/buffer/buf_init.c | 26 +++++++++++++++++--
 src/backend/storage/buffer/bufmgr.c   | 48 +++++++++++++++++------------------
 src/backend/storage/lmgr/lwlock.c     | 11 +++++---
 src/include/storage/buf_internals.h   | 10 +++++---
 src/include/storage/lwlock.h          | 10 ++++----
 5 files changed, 66 insertions(+), 39 deletions(-)

diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index e187242..27689d9 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -21,6 +21,7 @@
 BufferDesc *BufferDescriptors;
 char	   *BufferBlocks;
 int32	   *PrivateRefCount;
+LWLock		*BufferIOLocks;
 
 
 /*
@@ -62,6 +63,8 @@ int32	   *PrivateRefCount;
  *		backend.
  */
 
+static int			content_tranche_id, progress_tranche_id;
+static LWLockTranche content_tranche, progress_tranche;
 
 /*
  * Initialize shared buffer pool
@@ -79,10 +82,26 @@ InitBufferPool(void)
 		ShmemInitStruct("Buffer Descriptors",
 						NBuffers * sizeof(BufferDesc), &foundDescs);
 
+	BufferIOLocks = (LWLock *)
+		ShmemInitStruct("Buffer IO Locks",
+						NBuffers * sizeof(LWLock), &foundBufs);
+
 	BufferBlocks = (char *)
 		ShmemInitStruct("Buffer Blocks",
 						NBuffers * (Size) BLCKSZ, &foundBufs);
 
+	content_tranche_id = 1;
+	content_tranche.name = "Buffer Content Locks";
+	content_tranche.array_base = &BufferDescriptors->content_lock;
+	content_tranche.array_stride = sizeof(BufferDesc);
+	LWLockRegisterTranche(content_tranche_id, &content_tranche);
+
+	progress_tranche_id = 2;
+	progress_tranche.name = "Buffer IO Locks";
+	progress_tranche.array_base = BufferIOLocks;
+	progress_tranche.array_stride = sizeof(LWLock);
+	LWLockRegisterTranche(progress_tranche_id, &progress_tranche);
+
 	if (foundDescs || foundBufs)
 	{
 		/* both should be present or neither */
@@ -117,8 +136,8 @@ InitBufferPool(void)
 			 */
 			buf->freeNext = i + 1;
 
-			buf->io_in_progress_lock = LWLockAssign();
-			buf->content_lock = LWLockAssign();
+			LWLockInitialize(&buf->content_lock, content_tranche_id);
+			LWLockInitialize(&BufferIOLocks[i], progress_tranche_id);
 		}
 
 		/* Correct last entry of linked list */
@@ -168,6 +187,9 @@ BufferShmemSize(void)
 	/* size of buffer descriptors */
 	size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));
 
+	/* size of io progress locks */
+	size = add_size(size, mul_size(NBuffers, sizeof(LWLock)));
+
 	/* size of data pages */
 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 4e46ddb..28357c9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -651,7 +651,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			 * happens to be trying to split the page the first one got from
 			 * StrategyGetBuffer.)
 			 */
-			if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
+			if (LWLockConditionalAcquire((LWLock *) &buf->content_lock, LW_SHARED))
 			{
 				/*
 				 * If using a nondefault strategy, and writing the buffer
@@ -673,7 +673,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 						StrategyRejectBuffer(strategy, buf))
 					{
 						/* Drop lock/pin and loop around for another buffer */
-						LWLockRelease(buf->content_lock);
+						LWLockRelease((LWLock *) &buf->content_lock);
 						UnpinBuffer(buf, true);
 						continue;
 					}
@@ -686,7 +686,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 											  smgr->smgr_rnode.node.relNode);
 
 				FlushBuffer(buf, NULL);
-				LWLockRelease(buf->content_lock);
+				LWLockRelease((LWLock *) &buf->content_lock);
 
 				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
 											   smgr->smgr_rnode.node.spcNode,
@@ -1001,7 +1001,7 @@ MarkBufferDirty(Buffer buffer)
 
 	Assert(PrivateRefCount[buffer - 1] > 0);
 	/* unfortunately we can't check if the lock is held exclusively */
-	Assert(LWLockHeldByMe(bufHdr->content_lock));
+	Assert(LWLockHeldByMe((LWLock *) &bufHdr->content_lock));
 
 	LockBufHdr(bufHdr);
 
@@ -1175,8 +1175,8 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
 	if (PrivateRefCount[b] == 0)
 	{
 		/* I'd better not still hold any locks on the buffer */
-		Assert(!LWLockHeldByMe(buf->content_lock));
-		Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
+		Assert(!LWLockHeldByMe((LWLock *) &buf->content_lock));
+		Assert(!LWLockHeldByMe((LWLock *) &BufferIOLocks[buf->buf_id]));
 
 		LockBufHdr(buf);
 
@@ -1690,11 +1690,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
 	 * buffer is clean by the time we've locked it.)
 	 */
 	PinBuffer_Locked(bufHdr);
-	LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+	LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED);
 
 	FlushBuffer(bufHdr, NULL);
 
-	LWLockRelease(bufHdr->content_lock);
+	LWLockRelease((LWLock *) &bufHdr->content_lock);
 	UnpinBuffer(bufHdr, true);
 
 	return result | BUF_WRITTEN;
@@ -2453,9 +2453,9 @@ FlushRelationBuffers(Relation rel)
 			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
-			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+			LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED);
 			FlushBuffer(bufHdr, rel->rd_smgr);
-			LWLockRelease(bufHdr->content_lock);
+			LWLockRelease((LWLock *) &bufHdr->content_lock);
 			UnpinBuffer(bufHdr, true);
 		}
 		else
@@ -2503,9 +2503,9 @@ FlushDatabaseBuffers(Oid dbid)
 			(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
-			LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+			LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED);
 			FlushBuffer(bufHdr, NULL);
-			LWLockRelease(bufHdr->content_lock);
+			LWLockRelease((LWLock *) &bufHdr->content_lock);
 			UnpinBuffer(bufHdr, true);
 		}
 		else
@@ -2608,7 +2608,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 
 	Assert(PrivateRefCount[buffer - 1] > 0);
 	/* here, either share or exclusive lock is OK */
-	Assert(LWLockHeldByMe(bufHdr->content_lock));
+	Assert(LWLockHeldByMe((LWLock *) &bufHdr->content_lock));
 
 	/*
 	 * This routine might get called many times on the same page, if we are
@@ -2761,11 +2761,11 @@ LockBuffer(Buffer buffer, int mode)
 	buf = &(BufferDescriptors[buffer - 1]);
 
 	if (mode == BUFFER_LOCK_UNLOCK)
-		LWLockRelease(buf->content_lock);
+		LWLockRelease((LWLock *) &buf->content_lock);
 	else if (mode == BUFFER_LOCK_SHARE)
-		LWLockAcquire(buf->content_lock, LW_SHARED);
+		LWLockAcquire((LWLock *) &buf->content_lock, LW_SHARED);
 	else if (mode == BUFFER_LOCK_EXCLUSIVE)
-		LWLockAcquire(buf->content_lock, LW_EXCLUSIVE);
+		LWLockAcquire((LWLock *) &buf->content_lock, LW_EXCLUSIVE);
 	else
 		elog(ERROR, "unrecognized buffer lock mode: %d", mode);
 }
@@ -2786,7 +2786,7 @@ ConditionalLockBuffer(Buffer buffer)
 
 	buf = &(BufferDescriptors[buffer - 1]);
 
-	return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
+	return LWLockConditionalAcquire((LWLock *) &buf->content_lock, LW_EXCLUSIVE);
 }
 
 /*
@@ -2982,8 +2982,8 @@ WaitIO(volatile BufferDesc *buf)
 		UnlockBufHdr(buf);
 		if (!(sv_flags & BM_IO_IN_PROGRESS))
 			break;
-		LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
-		LWLockRelease(buf->io_in_progress_lock);
+		LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_SHARED);
+		LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]);
 	}
 }
 
@@ -3016,7 +3016,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 		 * Grab the io_in_progress lock so that other processes can wait for
 		 * me to finish the I/O.
 		 */
-		LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+		LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_EXCLUSIVE);
 
 		LockBufHdr(buf);
 
@@ -3030,7 +3030,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 		 * him to get unwedged.
 		 */
 		UnlockBufHdr(buf);
-		LWLockRelease(buf->io_in_progress_lock);
+		LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]);
 		WaitIO(buf);
 	}
 
@@ -3040,7 +3040,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput)
 	{
 		/* someone else already did the I/O */
 		UnlockBufHdr(buf);
-		LWLockRelease(buf->io_in_progress_lock);
+		LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]);
 		return false;
 	}
 
@@ -3089,7 +3089,7 @@ TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
 
 	InProgressBuf = NULL;
 
-	LWLockRelease(buf->io_in_progress_lock);
+	LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]);
 }
 
 /*
@@ -3114,7 +3114,7 @@ AbortBufferIO(void)
 		 * we can use TerminateBufferIO. Anyone who's executing WaitIO on the
 		 * buffer will be in a busy spin until we succeed in doing this.
 		 */
-		LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+		LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_EXCLUSIVE);
 
 		LockBufHdr(buf);
 		Assert(buf->flags & BM_IO_IN_PROGRESS);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 36b4b8b..7a7755f 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -228,9 +228,6 @@ NumLWLocks(void)
 	/* Predefined LWLocks */
 	numLocks = NUM_FIXED_LWLOCKS;
 
-	/* bufmgr.c needs two for each shared buffer */
-	numLocks += 2 * NBuffers;
-
 	/* proc.c needs one for each backend or auxiliary process */
 	numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
 
@@ -344,7 +341,13 @@ CreateLWLocks(void)
 		LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
 		LWLockCounter[0] = NUM_FIXED_LWLOCKS;
 		LWLockCounter[1] = numLocks;
-		LWLockCounter[2] = 1;			/* 0 is the main array */
+		/*
+		 * Tranches:
+		 * 0 is the main array
+		 * 1 are buffer content locks
+		 * 2 are buffer io in progress locks
+		 */
+		LWLockCounter[2] = 3;
 	}
 
 	if (LWLockTrancheArray == NULL)
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 93a0030..503a6ac 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -139,16 +139,15 @@ typedef struct sbufdesc
 	BufferTag	tag;			/* ID of page contained in buffer */
 	BufFlags	flags;			/* see bit definitions above */
 	uint16		usage_count;	/* usage counter for clock sweep code */
-	unsigned	refcount;		/* # of backends holding pins on buffer */
-	int			wait_backend_pid;		/* backend PID of pin-count waiter */
+	uint16		refcount;		/* # of backends holding pins on buffer */
 
 	slock_t		buf_hdr_lock;	/* protects the above fields */
 
 	int			buf_id;			/* buffer's index number (from 0) */
 	int			freeNext;		/* link in freelist chain */
+	int			wait_backend_pid;		/* backend PID of pin-count waiter */
 
-	LWLock     *io_in_progress_lock;	/* to wait for I/O to complete */
-	LWLock	   *content_lock;	/* to lock access to buffer contents */
+	LWLock		content_lock;	/* to lock access to buffer contents */
 } BufferDesc;
 
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
@@ -179,6 +178,9 @@ extern PGDLLIMPORT BufferDesc *BufferDescriptors;
 /* in localbuf.c */
 extern BufferDesc *LocalBufferDescriptors;
 
+/* in buf_init.c */
+extern PGDLLIMPORT LWLock *BufferIOLocks;
+
 
 /*
  * Internal routines: only called by bufmgr
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 3a19533..7f088ef 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -45,14 +45,14 @@ typedef struct LWLockTranche
  */
 typedef struct LWLock
 {
-	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
-	bool		releaseOK;		/* T if ok to release waiters */
-	char		exclusive;		/* # of exclusive holders (0 or 1) */
-	int			shared;			/* # of shared holders (0..MaxBackends) */
-	int			tranche;		/* tranche ID */
 	struct PGPROC *head;			/* head of list of waiting PGPROCs */
 	struct PGPROC *tail;			/* tail of list of waiting PGPROCs */
 	/* tail is undefined when head is NULL */
+	int			shared;			/* # of shared holders (0..MaxBackends) */
+	uint8		tranche;		/* tranche ID */
+	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
+	bool		releaseOK;		/* T if ok to release waiters */
+	char		exclusive;		/* # of exclusive holders (0 or 1) */
 } LWLock;
 
 /*
-- 
1.8.5.rc2.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to