On Tue, Jan 3, 2012 at 11:28 PM, Tom Lane <t...@sss.pgh.pa.us> wrote:
> Jim Nasby <j...@nasby.net> writes:
>> On Jan 3, 2012, at 12:11 PM, Simon Riggs wrote:
>>> This could well be related to the fact that DropRelFileNodeBuffers()
>>> does a scan of shared_buffers, which is an O(N) approach no matter the
>>> size of the index.
>
>> Couldn't we just leave the buffers alone? Once an index is dropped and 
>> that's pushed out through the catalog then nothing should be trying to 
>> access them and they'll eventually just get aged out.
>
> No, we can't, because if they're still dirty then the bgwriter would
> first try to write them to the no-longer-existing storage file.  It's
> important that we kill the buffers immediately during relation drop.
>
> I'm still thinking that it might be sufficient to mark the buffers
> invalid and let the clock sweep find them, thereby eliminating the need
> for a freelist.

My patch puts things on the freelist only when it is free to do so.
Not having a freelist at all is probably a simpler way of avoiding the
lock contention, so I'll happily back that suggestion instead. Patch
attached, previous patch revoked.

> Simon is after a different solution involving getting
> rid of the clock sweep...

err, No, he isn't. Not sure where that came from since I'm advocating
only minor changes there to curb worst case behaviour.

But lets discuss that on the main freelist thread.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 94cefba..9332a74 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -115,7 +115,7 @@ InitBufferPool(void)
 			 * Initially link all the buffers together as unused. Subsequent
 			 * management of this list is done by freelist.c.
 			 */
-			buf->freeNext = i + 1;
+			StrategyInitFreelistBuffer(buf);
 
 			buf->io_in_progress_lock = LWLockAssign();
 			buf->content_lock = LWLockAssign();
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 3e62448..6b49cae 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -27,6 +27,7 @@ typedef struct
 	/* Clock sweep hand: index of next buffer to consider grabbing */
 	int			nextVictimBuffer;
 
+#ifdef USE_BUFMGR_FREELIST
 	int			firstFreeBuffer;	/* Head of list of unused buffers */
 	int			lastFreeBuffer; /* Tail of list of unused buffers */
 
@@ -34,7 +35,7 @@ typedef struct
 	 * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
 	 * when the list is empty)
 	 */
-
+#endif
 	/*
 	 * Statistics.	These counters should be wide enough that they can't
 	 * overflow during a single bgwriter cycle.
@@ -134,6 +135,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 	 */
 	StrategyControl->numBufferAllocs++;
 
+#ifdef USE_BUFMGR_FREELIST
 	/*
 	 * Try to get a buffer from the freelist.  Note that the freeNext fields
 	 * are considered to be protected by the BufFreelistLock not the
@@ -165,8 +167,9 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 		}
 		UnlockBufHdr(buf);
 	}
+#endif
 
-	/* Nothing on the freelist, so run the "clock sweep" algorithm */
+	/* Run the "clock sweep" algorithm */
 	trycounter = NBuffers;
 	for (;;)
 	{
@@ -182,20 +185,25 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
 		 * it; decrement the usage_count (unless pinned) and keep scanning.
 		 */
-		LockBufHdr(buf);
 		if (buf->refcount == 0)
 		{
-			if (buf->usage_count > 0)
+			if (buf->usage_count > StrategyControl->completePasses)
 			{
 				buf->usage_count--;
 				trycounter = NBuffers;
 			}
 			else
 			{
-				/* Found a usable buffer */
-				if (strategy != NULL)
-					AddBufferToRing(strategy, buf);
-				return buf;
+				LockBufHdr(buf);
+				if (buf->refcount == 0)
+				{
+					UnlockBufHdr(buf);
+					/* Found a usable buffer */
+					if (strategy != NULL)
+						AddBufferToRing(strategy, buf);
+					return buf;
+				}
+				UnlockBufHdr(buf);
 			}
 		}
 		else if (--trycounter == 0)
@@ -207,10 +215,8 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 			 * probably better to fail than to risk getting stuck in an
 			 * infinite loop.
 			 */
-			UnlockBufHdr(buf);
 			elog(ERROR, "no unpinned buffers available");
 		}
-		UnlockBufHdr(buf);
 	}
 
 	/* not reached */
@@ -223,6 +229,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
 void
 StrategyFreeBuffer(volatile BufferDesc *buf)
 {
+#ifdef USE_BUFMGR_FREELIST
 	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
 
 	/*
@@ -238,6 +245,24 @@ StrategyFreeBuffer(volatile BufferDesc *buf)
 	}
 
 	LWLockRelease(BufFreelistLock);
+#endif
+}
+
+/*
+ * StrategyInitFreelist: put a buffer on the freelist during InitBufferPool
+ */
+void
+StrategyInitFreelistBuffer(volatile BufferDesc *buf)
+{
+#ifdef USE_BUFMGR_FREELIST
+	/*
+	 * Initially link all the buffers together as unused. Subsequent
+	 * management of this list is done by freelist.c.
+	 */
+	buf->freeNext = i + 1;
+#else
+	buf->freeNext = FREENEXT_NOT_IN_LIST;
+#endif
 }
 
 /*
@@ -331,12 +356,14 @@ StrategyInitialize(bool init)
 		 */
 		Assert(init);
 
+#ifdef USE_BUFMGR_FREELIST
 		/*
 		 * Grab the whole linked list of free buffers for our strategy. We
 		 * assume it was previously set up by InitBufferPool().
 		 */
 		StrategyControl->firstFreeBuffer = 0;
 		StrategyControl->lastFreeBuffer = NBuffers - 1;
+#endif
 
 		/* Initialize the clock sweep pointer */
 		StrategyControl->nextVictimBuffer = 0;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index e43719e..6a03d5d 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -190,6 +190,8 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
 extern int	StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc);
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
+extern void StrategyInitFreelistBuffer(volatile BufferDesc *buf);
+
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to