Patch to implement buffer cache recycling for scans, as being discussed
on pgsql-hackers.
Applies cleanly to cvstip, passes make installcheck when used by default
for all SeqScans. Tested with scan_recycle_buffers = 1,4,8,16
Should be regarded as WIP. Presumably there are some failure conditions
that require the buffer to be reset; these have not yet been considered.
No docs.
SET scan_recyle_buffers = N
default = 0
8 <= N <= 64 would yield benefits according to earlier results
--
Simon Riggs
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/executor/nodeSeqscan.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v
retrieving revision 1.63
diff -c -r1.63 nodeSeqscan.c
*** src/backend/executor/nodeSeqscan.c 5 Jan 2007 22:19:28 -0000 1.63
--- src/backend/executor/nodeSeqscan.c 9 Mar 2007 17:36:22 -0000
***************
*** 24,29 ****
--- 24,30 ----
*/
#include "postgres.h"
+ #include "miscadmin.h"
#include "access/heapam.h"
#include "executor/execdebug.h"
#include "executor/nodeSeqscan.h"
***************
*** 150,155 ****
--- 151,159 ----
currentRelation = ExecOpenScanRelation(estate,
((SeqScan *) node->ps.plan)->scanrelid);
+ if (NScanRecycleBuffers > 0 && RelationGetNumberOfBlocks(currentRelation) > NBuffers)
+ StrategyHintRecycleBuffers(NScanRecycleBuffers);
+
currentScanDesc = heap_beginscan(currentRelation,
estate->es_snapshot,
0,
***************
*** 272,277 ****
--- 276,283 ----
* close the heap relation.
*/
ExecCloseScanRelation(relation);
+
+ StrategyHintRecycleBuffers(0);
}
/* ----------------------------------------------------------------
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.215
diff -c -r1.215 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 1 Feb 2007 19:10:27 -0000 1.215
--- src/backend/storage/buffer/bufmgr.c 9 Mar 2007 17:36:22 -0000
***************
*** 320,325 ****
--- 320,326 ----
int buf_id;
volatile BufferDesc *buf;
bool valid;
+ bool lock_held = false;
/* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, reln, blockNum);
***************
*** 384,390 ****
* it would be bad to hold the spinlock while possibly waking up other
* processes.
*/
! buf = StrategyGetBuffer();
Assert(buf->refcount == 0);
--- 385,391 ----
* it would be bad to hold the spinlock while possibly waking up other
* processes.
*/
! buf = StrategyGetBuffer(&lock_held);
Assert(buf->refcount == 0);
***************
*** 395,401 ****
PinBuffer_Locked(buf);
/* Now it's safe to release the freelist lock */
! LWLockRelease(BufFreelistLock);
/*
* If the buffer was dirty, try to write it out. There is a race
--- 396,403 ----
PinBuffer_Locked(buf);
/* Now it's safe to release the freelist lock */
! if (lock_held)
! LWLockRelease(BufFreelistLock);
/*
* If the buffer was dirty, try to write it out. There is a race
***************
*** 884,891 ****
PrivateRefCount[b]--;
if (PrivateRefCount[b] == 0)
{
- bool immed_free_buffer = false;
-
/* I'd better not still hold any locks on the buffer */
Assert(!LWLockHeldByMe(buf->content_lock));
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
--- 886,891 ----
***************
*** 899,915 ****
/* Update buffer usage info, unless this is an internal access */
if (normalAccess)
{
if (!strategy_hint_vacuum)
{
if (buf->usage_count < BM_MAX_USAGE_COUNT)
buf->usage_count++;
}
- else
- {
- /* VACUUM accesses don't bump usage count, instead... */
- if (buf->refcount == 0 && buf->usage_count == 0)
- immed_free_buffer = true;
- }
}
if ((buf->flags & BM_PIN_COUNT_WAITER) &&
--- 899,910 ----
/* Update buffer usage info, unless this is an internal access */
if (normalAccess)
{
+ /* VACUUM accesses don't bump usage count, instead... */
if (!strategy_hint_vacuum)
{
if (buf->usage_count < BM_MAX_USAGE_COUNT)
buf->usage_count++;
}
}
if ((buf->flags & BM_PIN_COUNT_WAITER) &&
***************
*** 924,937 ****
}
else
UnlockBufHdr(buf);
-
- /*
- * If VACUUM is releasing an otherwise-unused buffer, send it to the
- * freelist for near-term reuse. We put it at the tail so that it
- * won't be used before any invalid buffers that may exist.
- */
- if (immed_free_buffer)
- StrategyFreeBuffer(buf, false);
}
}
--- 919,924 ----
Index: src/backend/storage/buffer/freelist.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v
retrieving revision 1.58
diff -c -r1.58 freelist.c
*** src/backend/storage/buffer/freelist.c 5 Jan 2007 22:19:37 -0000 1.58
--- src/backend/storage/buffer/freelist.c 9 Mar 2007 17:36:22 -0000
***************
*** 39,47 ****
/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;
/* Backend-local state about whether currently vacuuming */
bool strategy_hint_vacuum = false;
!
/*
* StrategyGetBuffer
--- 39,57 ----
/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;
+ /* Buffer Recycling */
+ #define MAX_RECYCLE_BUF_IDS 128
+ static volatile int LocalRecycleBufIds[MAX_RECYCLE_BUF_IDS];
+
+ #define BUF_ID_NOT_SET -1
+ int nextVictimRecycleBufId = BUF_ID_NOT_SET;
+
+ int NScanRecycleBuffers = 0;
+ int ThisScanRecycleBuffers = 0;
+
/* Backend-local state about whether currently vacuuming */
bool strategy_hint_vacuum = false;
! bool strategy_hint_recycle = false;
/*
* StrategyGetBuffer
***************
*** 56,66 ****
* the caller must release that lock once the spinlock is dropped.
*/
volatile BufferDesc *
! StrategyGetBuffer(void)
{
volatile BufferDesc *buf;
int trycounter;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
/*
--- 66,118 ----
* the caller must release that lock once the spinlock is dropped.
*/
volatile BufferDesc *
! StrategyGetBuffer(bool *lock_held)
{
volatile BufferDesc *buf;
int trycounter;
+ if (strategy_hint_recycle)
+ {
+ /*
+ * Get the next buffer from our local cyclic cache.
+ * Note that we don't need to hold the BufFreelistLock
+ * to get this buffer, because we aren't accessing any
+ * shared memory.
+ *
+ * Run private "clock cycle"
+ */
+ if (++nextVictimRecycleBufId >= ThisScanRecycleBuffers)
+ nextVictimRecycleBufId = 0;
+
+ /*
+ * If that slot hasn't been filled yet, use a new buffer
+ * allocated via the main shared buffer allocation strategy
+ */
+ if (LocalRecycleBufIds[nextVictimRecycleBufId] != BUF_ID_NOT_SET)
+ {
+ buf = &BufferDescriptors[LocalRecycleBufIds[nextVictimRecycleBufId]];
+ /*
+ * If the buffer is pinned we cannot use it in any circumstance.
+ * If usage_count == 0 then the buffer is fair game.
+ *
+ * We also choose this buffer if usage_count == 1. Strictly, this
+ * might sometimes be the wrong thing to do, but we rely on the
+ * high probability that it was this process that last touched
+ * the buffer. We do have to pick a victim, so it may as well be
+ * this one as any of the seldom touched blocks in the buffer pool.
+ */
+ *lock_held = false;
+ LockBufHdr(buf);
+ if (buf->refcount == 0 && buf->usage_count <= 1)
+ return buf;
+ UnlockBufHdr(buf);
+ }
+ }
+
+ /*
+ * If our selected buffer wasn't available, pick another...
+ */
+ *lock_held = true;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
/*
***************
*** 86,96 ****
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
return buf;
UnlockBufHdr(buf);
}
! /* Nothing on the freelist, so run the "clock sweep" algorithm */
trycounter = NBuffers;
for (;;)
{
--- 138,152 ----
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
+ {
+ if (strategy_hint_recycle)
+ LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id;
return buf;
+ }
UnlockBufHdr(buf);
}
! /* Nothing on the freelist, so run the shared "clock sweep" algorithm */
trycounter = NBuffers;
for (;;)
{
***************
*** 105,111 ****
--- 161,171 ----
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
+ {
+ if (strategy_hint_recycle)
+ LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id;
return buf;
+ }
if (buf->usage_count > 0)
{
buf->usage_count--;
***************
*** 197,204 ****
--- 257,309 ----
StrategyHintVacuum(bool vacuum_active)
{
strategy_hint_vacuum = vacuum_active;
+ if (vacuum_active)
+ StrategyHintRecycleBuffers(NScanRecycleBuffers);
+ else
+ StrategyHintRecycleBuffers(0);
}
+ /*
+ * StrategyHintRecycleOwnBuffers -- tell us whether to recycle buffers
+ * originally filled by this process. This is intended for use by
+ * callers who access blocks in a sequential pattern. Non-sequential
+ * access patterns could be disrupted severely by using this hint.
+ *
+ * Initial data suggests
+ * nRecycleBuffers = 16 for read-only scans (Mark Kirkwood)
+ * nRecycleBuffers = 32 for VACUUMs (Itagaki Takahiro)
+ * probably
+ * nRecycleBuffers >=128 for normal write-intensive tasks
+ * to allow for bgwriter activity
+ */
+ void
+ StrategyHintRecycleBuffers(int nRecycleBuffers)
+ {
+ int i;
+
+ if (nRecycleBuffers < 0 || nRecycleBuffers > NBuffers
+ || nRecycleBuffers > NScanRecycleBuffers)
+ nRecycleBuffers = 0;
+
+ if (nRecycleBuffers > NScanRecycleBuffers)
+ ThisScanRecycleBuffers = NScanRecycleBuffers;
+ else
+ ThisScanRecycleBuffers = nRecycleBuffers;
+
+ if (ThisScanRecycleBuffers > 0)
+ {
+ strategy_hint_recycle = true;
+
+ /* just before 1st element, to allow for preincrement */
+ nextVictimRecycleBufId = -1;
+
+ /* prepare the cyclic buffer */
+ for (i = 0; i < MAX_RECYCLE_BUF_IDS; i++)
+ LocalRecycleBufIds[i] = BUF_ID_NOT_SET;
+ }
+ else
+ strategy_hint_recycle = false;
+ }
/*
* StrategyShmemSize
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.379
diff -c -r1.379 guc.c
*** src/backend/utils/misc/guc.c 6 Mar 2007 02:06:14 -0000 1.379
--- src/backend/utils/misc/guc.c 9 Mar 2007 17:36:24 -0000
***************
*** 1184,1189 ****
--- 1184,1199 ----
},
{
+ {"scan_recycle_buffers", PGC_USERSET, RESOURCES_MEM,
+ gettext_noop("Sets the number of buffers to recycle during scans"),
+ NULL,
+ GUC_UNIT_BLOCKS
+ },
+ &NScanRecycleBuffers,
+ 0, 0, 128, NULL, NULL
+ },
+
+ {
{"port", PGC_POSTMASTER, CONN_AUTH_SETTINGS,
gettext_noop("Sets the TCP port the server listens on."),
NULL
Index: src/include/miscadmin.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/miscadmin.h,v
retrieving revision 1.193
diff -c -r1.193 miscadmin.h
*** src/include/miscadmin.h 1 Mar 2007 14:52:04 -0000 1.193
--- src/include/miscadmin.h 9 Mar 2007 17:36:25 -0000
***************
*** 128,133 ****
--- 128,134 ----
extern DLLIMPORT char *DataDir;
extern DLLIMPORT int NBuffers;
+ extern DLLIMPORT int NScanRecycleBuffers;
extern int MaxBackends;
extern DLLIMPORT int MyProcPid;
Index: src/include/storage/buf_internals.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/buf_internals.h,v
retrieving revision 1.89
diff -c -r1.89 buf_internals.h
*** src/include/storage/buf_internals.h 5 Jan 2007 22:19:57 -0000 1.89
--- src/include/storage/buf_internals.h 9 Mar 2007 17:36:28 -0000
***************
*** 184,190 ****
*/
/* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(void);
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
extern int StrategySyncStart(void);
extern Size StrategyShmemSize(void);
--- 184,190 ----
*/
/* freelist.c */
! extern volatile BufferDesc *StrategyGetBuffer(bool *lock_held);
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
extern int StrategySyncStart(void);
extern Size StrategyShmemSize(void);
Index: src/include/storage/bufmgr.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/bufmgr.h,v
retrieving revision 1.102
diff -c -r1.102 bufmgr.h
*** src/include/storage/bufmgr.h 5 Jan 2007 22:19:57 -0000 1.102
--- src/include/storage/bufmgr.h 9 Mar 2007 17:36:28 -0000
***************
*** 133,138 ****
--- 133,140 ----
extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
BlockNumber firstDelBlock);
extern void DropDatabaseBuffers(Oid dbid);
+ extern void DropAllSharedBuffers(void);
+
#ifdef NOT_USED
extern void PrintPinnedBufs(void);
***************
*** 157,161 ****
--- 159,164 ----
/* in freelist.c */
extern void StrategyHintVacuum(bool vacuum_active);
+ extern void StrategyHintRecycleBuffers(int nRecycleBuffers);
#endif
---------------------------(end of broadcast)---------------------------
TIP 6: explain analyze is your friend