On 2014-04-24 23:28:14 +0200, Andres Freund wrote: > On 2014-04-24 12:43:13 -0400, Tom Lane wrote: > > Andres Freund <and...@2ndquadrant.com> writes: > > > On 2014-04-24 11:02:44 -0400, Tom Lane wrote: > > >> FWIW, I like the LWLockAssignBatch idea a lot better than the currently > > >> proposed patch. LWLockAssign is a low-level function that has no > > >> business > > >> making risky assumptions about the context it's invoked in. > > > > > I don't think LWLockAssignBatch() is that easy without introducing > > > layering violations. It can't just return a pointer out of the main > > > lwlock array that then can be ++ed clientside because MainLWLockArray's > > > stride isn't sizeof(LWLock). > > > > Meh. I knew this business of using pointers instead of indexes would > > have some downsides. > > > > We could return the array stride ... kinda ugly, but since there's > > probably only one consumer for this API, it's not *that* bad. Could > > wrap the stride-increment in a macro, perhaps. > > I think I am just going to wait for 9.5 where I sure hope we can > allocate the buffer lwlocks outside the main array...
For reference (and backup), here's my current patch for that. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
>From 24cd57b86a5ad4dc625daa61b62663bb796a5e38 Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Tue, 28 Jan 2014 17:06:01 +0100 Subject: [PATCH] lwlock-inline --- src/backend/storage/buffer/buf_init.c | 26 +++++++++++++++++-- src/backend/storage/buffer/bufmgr.c | 48 +++++++++++++++++------------------ src/backend/storage/lmgr/lwlock.c | 11 +++++--- src/include/storage/buf_internals.h | 10 +++++--- src/include/storage/lwlock.h | 10 ++++---- 5 files changed, 66 insertions(+), 39 deletions(-) diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index e187242..27689d9 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -21,6 +21,7 @@ BufferDesc *BufferDescriptors; char *BufferBlocks; int32 *PrivateRefCount; +LWLock *BufferIOLocks; /* @@ -62,6 +63,8 @@ int32 *PrivateRefCount; * backend. */ +static int content_tranche_id, progress_tranche_id; +static LWLockTranche content_tranche, progress_tranche; /* * Initialize shared buffer pool @@ -79,10 +82,26 @@ InitBufferPool(void) ShmemInitStruct("Buffer Descriptors", NBuffers * sizeof(BufferDesc), &foundDescs); + BufferIOLocks = (LWLock *) + ShmemInitStruct("Buffer IO Locks", + NBuffers * sizeof(LWLock), &foundBufs); + BufferBlocks = (char *) ShmemInitStruct("Buffer Blocks", NBuffers * (Size) BLCKSZ, &foundBufs); + content_tranche_id = 1; + content_tranche.name = "Buffer Content Locks"; + content_tranche.array_base = &BufferDescriptors->content_lock; + content_tranche.array_stride = sizeof(BufferDesc); + LWLockRegisterTranche(content_tranche_id, &content_tranche); + + progress_tranche_id = 2; + progress_tranche.name = "Buffer IO Locks"; + progress_tranche.array_base = BufferIOLocks; + progress_tranche.array_stride = sizeof(LWLock); + LWLockRegisterTranche(progress_tranche_id, &progress_tranche); + if (foundDescs || foundBufs) { /* both should be present or neither */ @@ -117,8 +136,8 @@ InitBufferPool(void) */ buf->freeNext = i + 1; - buf->io_in_progress_lock = LWLockAssign(); - buf->content_lock = LWLockAssign(); + LWLockInitialize(&buf->content_lock, content_tranche_id); + LWLockInitialize(&BufferIOLocks[i], progress_tranche_id); } /* Correct last entry of linked list */ @@ -168,6 +187,9 @@ BufferShmemSize(void) /* size of buffer descriptors */ size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc))); + /* size of io progress locks */ + size = add_size(size, mul_size(NBuffers, sizeof(LWLock))); + /* size of data pages */ size = add_size(size, mul_size(NBuffers, BLCKSZ)); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 4e46ddb..28357c9 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -651,7 +651,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * happens to be trying to split the page the first one got from * StrategyGetBuffer.) */ - if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED)) + if (LWLockConditionalAcquire((LWLock *) &buf->content_lock, LW_SHARED)) { /* * If using a nondefault strategy, and writing the buffer @@ -673,7 +673,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, StrategyRejectBuffer(strategy, buf)) { /* Drop lock/pin and loop around for another buffer */ - LWLockRelease(buf->content_lock); + LWLockRelease((LWLock *) &buf->content_lock); UnpinBuffer(buf, true); continue; } @@ -686,7 +686,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rnode.node.relNode); FlushBuffer(buf, NULL); - LWLockRelease(buf->content_lock); + LWLockRelease((LWLock *) &buf->content_lock); TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum, smgr->smgr_rnode.node.spcNode, @@ -1001,7 +1001,7 @@ MarkBufferDirty(Buffer buffer) Assert(PrivateRefCount[buffer - 1] > 0); /* unfortunately we can't check if the lock is held exclusively */ - Assert(LWLockHeldByMe(bufHdr->content_lock)); + Assert(LWLockHeldByMe((LWLock *) &bufHdr->content_lock)); LockBufHdr(bufHdr); @@ -1175,8 +1175,8 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner) if (PrivateRefCount[b] == 0) { /* I'd better not still hold any locks on the buffer */ - Assert(!LWLockHeldByMe(buf->content_lock)); - Assert(!LWLockHeldByMe(buf->io_in_progress_lock)); + Assert(!LWLockHeldByMe((LWLock *) &buf->content_lock)); + Assert(!LWLockHeldByMe((LWLock *) &BufferIOLocks[buf->buf_id])); LockBufHdr(buf); @@ -1690,11 +1690,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) * buffer is clean by the time we've locked it.) */ PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED); FlushBuffer(bufHdr, NULL); - LWLockRelease(bufHdr->content_lock); + LWLockRelease((LWLock *) &bufHdr->content_lock); UnpinBuffer(bufHdr, true); return result | BUF_WRITTEN; @@ -2453,9 +2453,9 @@ FlushRelationBuffers(Relation rel) (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED); FlushBuffer(bufHdr, rel->rd_smgr); - LWLockRelease(bufHdr->content_lock); + LWLockRelease((LWLock *) &bufHdr->content_lock); UnpinBuffer(bufHdr, true); } else @@ -2503,9 +2503,9 @@ FlushDatabaseBuffers(Oid dbid) (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire((LWLock *) &bufHdr->content_lock, LW_SHARED); FlushBuffer(bufHdr, NULL); - LWLockRelease(bufHdr->content_lock); + LWLockRelease((LWLock *) &bufHdr->content_lock); UnpinBuffer(bufHdr, true); } else @@ -2608,7 +2608,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) Assert(PrivateRefCount[buffer - 1] > 0); /* here, either share or exclusive lock is OK */ - Assert(LWLockHeldByMe(bufHdr->content_lock)); + Assert(LWLockHeldByMe((LWLock *) &bufHdr->content_lock)); /* * This routine might get called many times on the same page, if we are @@ -2761,11 +2761,11 @@ LockBuffer(Buffer buffer, int mode) buf = &(BufferDescriptors[buffer - 1]); if (mode == BUFFER_LOCK_UNLOCK) - LWLockRelease(buf->content_lock); + LWLockRelease((LWLock *) &buf->content_lock); else if (mode == BUFFER_LOCK_SHARE) - LWLockAcquire(buf->content_lock, LW_SHARED); + LWLockAcquire((LWLock *) &buf->content_lock, LW_SHARED); else if (mode == BUFFER_LOCK_EXCLUSIVE) - LWLockAcquire(buf->content_lock, LW_EXCLUSIVE); + LWLockAcquire((LWLock *) &buf->content_lock, LW_EXCLUSIVE); else elog(ERROR, "unrecognized buffer lock mode: %d", mode); } @@ -2786,7 +2786,7 @@ ConditionalLockBuffer(Buffer buffer) buf = &(BufferDescriptors[buffer - 1]); - return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); + return LWLockConditionalAcquire((LWLock *) &buf->content_lock, LW_EXCLUSIVE); } /* @@ -2982,8 +2982,8 @@ WaitIO(volatile BufferDesc *buf) UnlockBufHdr(buf); if (!(sv_flags & BM_IO_IN_PROGRESS)) break; - LWLockAcquire(buf->io_in_progress_lock, LW_SHARED); - LWLockRelease(buf->io_in_progress_lock); + LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_SHARED); + LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]); } } @@ -3016,7 +3016,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput) * Grab the io_in_progress lock so that other processes can wait for * me to finish the I/O. */ - LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); + LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_EXCLUSIVE); LockBufHdr(buf); @@ -3030,7 +3030,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput) * him to get unwedged. */ UnlockBufHdr(buf); - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]); WaitIO(buf); } @@ -3040,7 +3040,7 @@ StartBufferIO(volatile BufferDesc *buf, bool forInput) { /* someone else already did the I/O */ UnlockBufHdr(buf); - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]); return false; } @@ -3089,7 +3089,7 @@ TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, InProgressBuf = NULL; - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease((LWLock *) &BufferIOLocks[buf->buf_id]); } /* @@ -3114,7 +3114,7 @@ AbortBufferIO(void) * we can use TerminateBufferIO. Anyone who's executing WaitIO on the * buffer will be in a busy spin until we succeed in doing this. */ - LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); + LWLockAcquire((LWLock *) &BufferIOLocks[buf->buf_id], LW_EXCLUSIVE); LockBufHdr(buf); Assert(buf->flags & BM_IO_IN_PROGRESS); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 36b4b8b..7a7755f 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -228,9 +228,6 @@ NumLWLocks(void) /* Predefined LWLocks */ numLocks = NUM_FIXED_LWLOCKS; - /* bufmgr.c needs two for each shared buffer */ - numLocks += 2 * NBuffers; - /* proc.c needs one for each backend or auxiliary process */ numLocks += MaxBackends + NUM_AUXILIARY_PROCS; @@ -344,7 +341,13 @@ CreateLWLocks(void) LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int)); LWLockCounter[0] = NUM_FIXED_LWLOCKS; LWLockCounter[1] = numLocks; - LWLockCounter[2] = 1; /* 0 is the main array */ + /* + * Tranches: + * 0 is the main array + * 1 are buffer content locks + * 2 are buffer io in progress locks + */ + LWLockCounter[2] = 3; } if (LWLockTrancheArray == NULL) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 93a0030..503a6ac 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -139,16 +139,15 @@ typedef struct sbufdesc BufferTag tag; /* ID of page contained in buffer */ BufFlags flags; /* see bit definitions above */ uint16 usage_count; /* usage counter for clock sweep code */ - unsigned refcount; /* # of backends holding pins on buffer */ - int wait_backend_pid; /* backend PID of pin-count waiter */ + uint16 refcount; /* # of backends holding pins on buffer */ slock_t buf_hdr_lock; /* protects the above fields */ int buf_id; /* buffer's index number (from 0) */ int freeNext; /* link in freelist chain */ + int wait_backend_pid; /* backend PID of pin-count waiter */ - LWLock *io_in_progress_lock; /* to wait for I/O to complete */ - LWLock *content_lock; /* to lock access to buffer contents */ + LWLock content_lock; /* to lock access to buffer contents */ } BufferDesc; #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) @@ -179,6 +178,9 @@ extern PGDLLIMPORT BufferDesc *BufferDescriptors; /* in localbuf.c */ extern BufferDesc *LocalBufferDescriptors; +/* in buf_init.c */ +extern PGDLLIMPORT LWLock *BufferIOLocks; + /* * Internal routines: only called by bufmgr diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 3a19533..7f088ef 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -45,14 +45,14 @@ typedef struct LWLockTranche */ typedef struct LWLock { - slock_t mutex; /* Protects LWLock and queue of PGPROCs */ - bool releaseOK; /* T if ok to release waiters */ - char exclusive; /* # of exclusive holders (0 or 1) */ - int shared; /* # of shared holders (0..MaxBackends) */ - int tranche; /* tranche ID */ struct PGPROC *head; /* head of list of waiting PGPROCs */ struct PGPROC *tail; /* tail of list of waiting PGPROCs */ /* tail is undefined when head is NULL */ + int shared; /* # of shared holders (0..MaxBackends) */ + uint8 tranche; /* tranche ID */ + slock_t mutex; /* Protects LWLock and queue of PGPROCs */ + bool releaseOK; /* T if ok to release waiters */ + char exclusive; /* # of exclusive holders (0 or 1) */ } LWLock; /* -- 1.8.5.rc2.dirty
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers