Patch to implement buffer cache recycling for scans, as being discussed
on pgsql-hackers.

Applies cleanly to cvstip, passes make installcheck when used by default
for all SeqScans. Tested with scan_recycle_buffers = 1,4,8,16

Should be regarded as WIP. Presumably there are some failure conditions
that require the buffer to be reset; these have not yet been considered.

No docs.

SET scan_recyle_buffers = N
default = 0

8 <= N <= 64 would yield benefits according to earlier results

-- 
  Simon Riggs             
  EnterpriseDB   http://www.enterprisedb.com

Index: src/backend/executor/nodeSeqscan.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v
retrieving revision 1.63
diff -c -r1.63 nodeSeqscan.c
*** src/backend/executor/nodeSeqscan.c	5 Jan 2007 22:19:28 -0000	1.63
--- src/backend/executor/nodeSeqscan.c	9 Mar 2007 17:36:22 -0000
***************
*** 24,29 ****
--- 24,30 ----
   */
  #include "postgres.h"
  
+ #include "miscadmin.h"
  #include "access/heapam.h"
  #include "executor/execdebug.h"
  #include "executor/nodeSeqscan.h"
***************
*** 150,155 ****
--- 151,159 ----
  	currentRelation = ExecOpenScanRelation(estate,
  									 ((SeqScan *) node->ps.plan)->scanrelid);
  
+ 	if (NScanRecycleBuffers > 0 && RelationGetNumberOfBlocks(currentRelation) > NBuffers)
+ 		StrategyHintRecycleBuffers(NScanRecycleBuffers);
+ 
  	currentScanDesc = heap_beginscan(currentRelation,
  									 estate->es_snapshot,
  									 0,
***************
*** 272,277 ****
--- 276,283 ----
  	 * close the heap relation.
  	 */
  	ExecCloseScanRelation(relation);
+ 
+ 	StrategyHintRecycleBuffers(0);
  }
  
  /* ----------------------------------------------------------------
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.215
diff -c -r1.215 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c	1 Feb 2007 19:10:27 -0000	1.215
--- src/backend/storage/buffer/bufmgr.c	9 Mar 2007 17:36:22 -0000
***************
*** 320,325 ****
--- 320,326 ----
  	int			buf_id;
  	volatile BufferDesc *buf;
  	bool		valid;
+ 	bool		lock_held = false;
  
  	/* create a tag so we can lookup the buffer */
  	INIT_BUFFERTAG(newTag, reln, blockNum);
***************
*** 384,390 ****
  		 * it would be bad to hold the spinlock while possibly waking up other
  		 * processes.
  		 */
! 		buf = StrategyGetBuffer();
  
  		Assert(buf->refcount == 0);
  
--- 385,391 ----
  		 * it would be bad to hold the spinlock while possibly waking up other
  		 * processes.
  		 */
! 		buf = StrategyGetBuffer(&lock_held);
  
  		Assert(buf->refcount == 0);
  
***************
*** 395,401 ****
  		PinBuffer_Locked(buf);
  
  		/* Now it's safe to release the freelist lock */
! 		LWLockRelease(BufFreelistLock);
  
  		/*
  		 * If the buffer was dirty, try to write it out.  There is a race
--- 396,403 ----
  		PinBuffer_Locked(buf);
  
  		/* Now it's safe to release the freelist lock */
! 		if (lock_held)
! 			LWLockRelease(BufFreelistLock);
  
  		/*
  		 * If the buffer was dirty, try to write it out.  There is a race
***************
*** 884,891 ****
  	PrivateRefCount[b]--;
  	if (PrivateRefCount[b] == 0)
  	{
- 		bool		immed_free_buffer = false;
- 
  		/* I'd better not still hold any locks on the buffer */
  		Assert(!LWLockHeldByMe(buf->content_lock));
  		Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
--- 886,891 ----
***************
*** 899,915 ****
  		/* Update buffer usage info, unless this is an internal access */
  		if (normalAccess)
  		{
  			if (!strategy_hint_vacuum)
  			{
  				if (buf->usage_count < BM_MAX_USAGE_COUNT)
  					buf->usage_count++;
  			}
- 			else
- 			{
- 				/* VACUUM accesses don't bump usage count, instead... */
- 				if (buf->refcount == 0 && buf->usage_count == 0)
- 					immed_free_buffer = true;
- 			}
  		}
  
  		if ((buf->flags & BM_PIN_COUNT_WAITER) &&
--- 899,910 ----
  		/* Update buffer usage info, unless this is an internal access */
  		if (normalAccess)
  		{
+ 			/* VACUUM accesses don't bump usage count, instead... */
  			if (!strategy_hint_vacuum)
  			{
  				if (buf->usage_count < BM_MAX_USAGE_COUNT)
  					buf->usage_count++;
  			}
  		}
  
  		if ((buf->flags & BM_PIN_COUNT_WAITER) &&
***************
*** 924,937 ****
  		}
  		else
  			UnlockBufHdr(buf);
- 
- 		/*
- 		 * If VACUUM is releasing an otherwise-unused buffer, send it to the
- 		 * freelist for near-term reuse.  We put it at the tail so that it
- 		 * won't be used before any invalid buffers that may exist.
- 		 */
- 		if (immed_free_buffer)
- 			StrategyFreeBuffer(buf, false);
  	}
  }
  
--- 919,924 ----
Index: src/backend/storage/buffer/freelist.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v
retrieving revision 1.58
diff -c -r1.58 freelist.c
*** src/backend/storage/buffer/freelist.c	5 Jan 2007 22:19:37 -0000	1.58
--- src/backend/storage/buffer/freelist.c	9 Mar 2007 17:36:22 -0000
***************
*** 39,47 ****
  /* Pointers to shared state */
  static BufferStrategyControl *StrategyControl = NULL;
  
  /* Backend-local state about whether currently vacuuming */
  bool		strategy_hint_vacuum = false;
! 
  
  /*
   * StrategyGetBuffer
--- 39,57 ----
  /* Pointers to shared state */
  static BufferStrategyControl *StrategyControl = NULL;
  
+ /* Buffer Recycling */
+ #define MAX_RECYCLE_BUF_IDS		128
+ static volatile int LocalRecycleBufIds[MAX_RECYCLE_BUF_IDS];
+ 
+ #define BUF_ID_NOT_SET 	-1
+ int nextVictimRecycleBufId = BUF_ID_NOT_SET;
+ 
+ int NScanRecycleBuffers = 0;
+ int ThisScanRecycleBuffers = 0;
+ 
  /* Backend-local state about whether currently vacuuming */
  bool		strategy_hint_vacuum = false;
! bool		strategy_hint_recycle = false;
  
  /*
   * StrategyGetBuffer
***************
*** 56,66 ****
   *	the caller must release that lock once the spinlock is dropped.
   */
  volatile BufferDesc *
! StrategyGetBuffer(void)
  {
  	volatile BufferDesc *buf;
  	int			trycounter;
  
  	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
  
  	/*
--- 66,118 ----
   *	the caller must release that lock once the spinlock is dropped.
   */
  volatile BufferDesc *
! StrategyGetBuffer(bool *lock_held)
  {
  	volatile BufferDesc *buf;
  	int			trycounter;
  
+ 	if (strategy_hint_recycle)
+ 	{
+ 		/*
+ 		 * Get the next buffer from our local cyclic cache.
+ 		 * Note that we don't need to hold the BufFreelistLock
+ 		 * to get this buffer, because we aren't accessing any
+ 		 * shared memory.
+ 		 *
+ 		 * Run private "clock cycle"
+ 		 */
+ 		if (++nextVictimRecycleBufId >= ThisScanRecycleBuffers)
+ 			nextVictimRecycleBufId = 0;
+ 
+ 		/*
+ 		 * If that slot hasn't been filled yet, use a new buffer
+ 		 * allocated via the main shared buffer allocation strategy
+ 		 */
+ 		if (LocalRecycleBufIds[nextVictimRecycleBufId] != BUF_ID_NOT_SET)
+ 		{
+ 			buf = &BufferDescriptors[LocalRecycleBufIds[nextVictimRecycleBufId]];
+ 			/*
+ 			 * If the buffer is pinned we cannot use it in any circumstance.
+ 			 * If usage_count == 0 then the buffer is fair game. 
+ 			 *
+ 			 * We also choose this buffer if usage_count == 1. Strictly, this
+ 			 * might sometimes be the wrong thing to do, but we rely on the
+ 			 * high probability that it was this process that last touched 
+ 			 * the buffer. We do have to pick a victim, so it may as well be
+ 			 * this one as any of the seldom touched blocks in the buffer pool.
+ 			 */
+ 			*lock_held = false;
+ 			LockBufHdr(buf);
+ 			if (buf->refcount == 0 && buf->usage_count <= 1)
+ 				return buf;
+ 			UnlockBufHdr(buf);
+ 		}
+ 	}
+ 
+ 	/*
+ 	 * If our selected buffer wasn't available, pick another...
+ 	 */
+ 	*lock_held = true;
  	LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
  
  	/*
***************
*** 86,96 ****
  		 */
  		LockBufHdr(buf);
  		if (buf->refcount == 0 && buf->usage_count == 0)
  			return buf;
  		UnlockBufHdr(buf);
  	}
  
! 	/* Nothing on the freelist, so run the "clock sweep" algorithm */
  	trycounter = NBuffers;
  	for (;;)
  	{
--- 138,152 ----
  		 */
  		LockBufHdr(buf);
  		if (buf->refcount == 0 && buf->usage_count == 0)
+ 		{
+ 			if (strategy_hint_recycle)
+ 				LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id;
  			return buf;
+ 		}
  		UnlockBufHdr(buf);
  	}
  
! 	/* Nothing on the freelist, so run the shared "clock sweep" algorithm */
  	trycounter = NBuffers;
  	for (;;)
  	{
***************
*** 105,111 ****
--- 161,171 ----
  		 */
  		LockBufHdr(buf);
  		if (buf->refcount == 0 && buf->usage_count == 0)
+ 		{
+ 			if (strategy_hint_recycle)
+ 				LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id;
  			return buf;
+ 		}
  		if (buf->usage_count > 0)
  		{
  			buf->usage_count--;
***************
*** 197,204 ****
--- 257,309 ----
  StrategyHintVacuum(bool vacuum_active)
  {
  	strategy_hint_vacuum = vacuum_active;
+ 	if (vacuum_active)
+ 		StrategyHintRecycleBuffers(NScanRecycleBuffers);
+ 	else
+ 		StrategyHintRecycleBuffers(0);
  }
  
+ /*
+  * StrategyHintRecycleOwnBuffers -- tell us whether to recycle buffers 
+  * originally filled by this process. This is intended for use by
+  * callers who access blocks in a sequential pattern. Non-sequential
+  * access patterns could be disrupted severely by using this hint.
+  *
+  * Initial data suggests	
+  * 		nRecycleBuffers = 16 for read-only scans (Mark Kirkwood)
+  *		nRecycleBuffers = 32 for VACUUMs		 (Itagaki Takahiro)
+  * probably
+  *		nRecycleBuffers >=128 for normal write-intensive tasks
+  *								to allow for bgwriter activity
+  */
+ void
+ StrategyHintRecycleBuffers(int nRecycleBuffers)
+ {
+ 	int	i;
+ 
+ 	if (nRecycleBuffers < 0 || nRecycleBuffers > NBuffers 
+ 							|| nRecycleBuffers > NScanRecycleBuffers)
+ 		nRecycleBuffers = 0;
+ 
+ 	if (nRecycleBuffers > NScanRecycleBuffers)
+ 		ThisScanRecycleBuffers = NScanRecycleBuffers;
+ 	else
+ 		ThisScanRecycleBuffers = nRecycleBuffers;
+ 
+ 	if (ThisScanRecycleBuffers > 0)
+ 	{
+ 		strategy_hint_recycle = true;
+ 
+ 		/* just before 1st element, to allow for preincrement */
+ 		nextVictimRecycleBufId = -1;
+ 
+ 		/* prepare the cyclic buffer */
+ 		for (i = 0; i < MAX_RECYCLE_BUF_IDS; i++)
+ 			LocalRecycleBufIds[i] = BUF_ID_NOT_SET;
+ 	}
+ 	else
+ 		strategy_hint_recycle = false;
+ }
  
  /*
   * StrategyShmemSize
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.379
diff -c -r1.379 guc.c
*** src/backend/utils/misc/guc.c	6 Mar 2007 02:06:14 -0000	1.379
--- src/backend/utils/misc/guc.c	9 Mar 2007 17:36:24 -0000
***************
*** 1184,1189 ****
--- 1184,1199 ----
  	},
  
  	{
+ 		{"scan_recycle_buffers", PGC_USERSET, RESOURCES_MEM,
+ 			gettext_noop("Sets the number of buffers to recycle during scans"),
+ 			NULL,
+ 			GUC_UNIT_BLOCKS
+ 		},
+ 		&NScanRecycleBuffers,
+ 		0, 0, 128, NULL, NULL
+ 	},
+ 
+ 	{
  		{"port", PGC_POSTMASTER, CONN_AUTH_SETTINGS,
  			gettext_noop("Sets the TCP port the server listens on."),
  			NULL
Index: src/include/miscadmin.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/miscadmin.h,v
retrieving revision 1.193
diff -c -r1.193 miscadmin.h
*** src/include/miscadmin.h	1 Mar 2007 14:52:04 -0000	1.193
--- src/include/miscadmin.h	9 Mar 2007 17:36:25 -0000
***************
*** 128,133 ****
--- 128,134 ----
  extern DLLIMPORT char *DataDir;
  
  extern DLLIMPORT int NBuffers;
+ extern DLLIMPORT int NScanRecycleBuffers;
  extern int	MaxBackends;
  
  extern DLLIMPORT int MyProcPid;
Index: src/include/storage/buf_internals.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/buf_internals.h,v
retrieving revision 1.89
diff -c -r1.89 buf_internals.h
*** src/include/storage/buf_internals.h	5 Jan 2007 22:19:57 -0000	1.89
--- src/include/storage/buf_internals.h	9 Mar 2007 17:36:28 -0000
***************
*** 184,190 ****
   */
  
  /* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(void);
  extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
  extern int	StrategySyncStart(void);
  extern Size StrategyShmemSize(void);
--- 184,190 ----
   */
  
  /* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(bool *lock_held);
  extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
  extern int	StrategySyncStart(void);
  extern Size StrategyShmemSize(void);
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h	5 Jan 2007 22:19:57 -0000	1.102
--- src/include/storage/bufmgr.h	9 Mar 2007 17:36:28 -0000
***************
*** 133,138 ****
--- 133,140 ----
  extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
  					   BlockNumber firstDelBlock);
  extern void DropDatabaseBuffers(Oid dbid);
+ extern void DropAllSharedBuffers(void);
+ 
  
  #ifdef NOT_USED
  extern void PrintPinnedBufs(void);
***************
*** 157,161 ****
--- 159,164 ----
  
  /* in freelist.c */
  extern void StrategyHintVacuum(bool vacuum_active);
+ extern void StrategyHintRecycleBuffers(int nRecycleBuffers);
  
  #endif
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend

Reply via email to