Patch to implement buffer cache recycling for scans, as being discussed on pgsql-hackers.
Applies cleanly to cvstip, passes make installcheck when used by default for all SeqScans. Tested with scan_recycle_buffers = 1,4,8,16 Should be regarded as WIP. Presumably there are some failure conditions that require the buffer to be reset; these have not yet been considered. No docs. SET scan_recyle_buffers = N default = 0 8 <= N <= 64 would yield benefits according to earlier results -- Simon Riggs EnterpriseDB http://www.enterprisedb.com
Index: src/backend/executor/nodeSeqscan.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSeqscan.c,v retrieving revision 1.63 diff -c -r1.63 nodeSeqscan.c *** src/backend/executor/nodeSeqscan.c 5 Jan 2007 22:19:28 -0000 1.63 --- src/backend/executor/nodeSeqscan.c 9 Mar 2007 17:36:22 -0000 *************** *** 24,29 **** --- 24,30 ---- */ #include "postgres.h" + #include "miscadmin.h" #include "access/heapam.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" *************** *** 150,155 **** --- 151,159 ---- currentRelation = ExecOpenScanRelation(estate, ((SeqScan *) node->ps.plan)->scanrelid); + if (NScanRecycleBuffers > 0 && RelationGetNumberOfBlocks(currentRelation) > NBuffers) + StrategyHintRecycleBuffers(NScanRecycleBuffers); + currentScanDesc = heap_beginscan(currentRelation, estate->es_snapshot, 0, *************** *** 272,277 **** --- 276,283 ---- * close the heap relation. */ ExecCloseScanRelation(relation); + + StrategyHintRecycleBuffers(0); } /* ---------------------------------------------------------------- Index: src/backend/storage/buffer/bufmgr.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v retrieving revision 1.215 diff -c -r1.215 bufmgr.c *** src/backend/storage/buffer/bufmgr.c 1 Feb 2007 19:10:27 -0000 1.215 --- src/backend/storage/buffer/bufmgr.c 9 Mar 2007 17:36:22 -0000 *************** *** 320,325 **** --- 320,326 ---- int buf_id; volatile BufferDesc *buf; bool valid; + bool lock_held = false; /* create a tag so we can lookup the buffer */ INIT_BUFFERTAG(newTag, reln, blockNum); *************** *** 384,390 **** * it would be bad to hold the spinlock while possibly waking up other * processes. */ ! buf = StrategyGetBuffer(); Assert(buf->refcount == 0); --- 385,391 ---- * it would be bad to hold the spinlock while possibly waking up other * processes. */ ! buf = StrategyGetBuffer(&lock_held); Assert(buf->refcount == 0); *************** *** 395,401 **** PinBuffer_Locked(buf); /* Now it's safe to release the freelist lock */ ! LWLockRelease(BufFreelistLock); /* * If the buffer was dirty, try to write it out. There is a race --- 396,403 ---- PinBuffer_Locked(buf); /* Now it's safe to release the freelist lock */ ! if (lock_held) ! LWLockRelease(BufFreelistLock); /* * If the buffer was dirty, try to write it out. There is a race *************** *** 884,891 **** PrivateRefCount[b]--; if (PrivateRefCount[b] == 0) { - bool immed_free_buffer = false; - /* I'd better not still hold any locks on the buffer */ Assert(!LWLockHeldByMe(buf->content_lock)); Assert(!LWLockHeldByMe(buf->io_in_progress_lock)); --- 886,891 ---- *************** *** 899,915 **** /* Update buffer usage info, unless this is an internal access */ if (normalAccess) { if (!strategy_hint_vacuum) { if (buf->usage_count < BM_MAX_USAGE_COUNT) buf->usage_count++; } - else - { - /* VACUUM accesses don't bump usage count, instead... */ - if (buf->refcount == 0 && buf->usage_count == 0) - immed_free_buffer = true; - } } if ((buf->flags & BM_PIN_COUNT_WAITER) && --- 899,910 ---- /* Update buffer usage info, unless this is an internal access */ if (normalAccess) { + /* VACUUM accesses don't bump usage count, instead... */ if (!strategy_hint_vacuum) { if (buf->usage_count < BM_MAX_USAGE_COUNT) buf->usage_count++; } } if ((buf->flags & BM_PIN_COUNT_WAITER) && *************** *** 924,937 **** } else UnlockBufHdr(buf); - - /* - * If VACUUM is releasing an otherwise-unused buffer, send it to the - * freelist for near-term reuse. We put it at the tail so that it - * won't be used before any invalid buffers that may exist. - */ - if (immed_free_buffer) - StrategyFreeBuffer(buf, false); } } --- 919,924 ---- Index: src/backend/storage/buffer/freelist.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v retrieving revision 1.58 diff -c -r1.58 freelist.c *** src/backend/storage/buffer/freelist.c 5 Jan 2007 22:19:37 -0000 1.58 --- src/backend/storage/buffer/freelist.c 9 Mar 2007 17:36:22 -0000 *************** *** 39,47 **** /* Pointers to shared state */ static BufferStrategyControl *StrategyControl = NULL; /* Backend-local state about whether currently vacuuming */ bool strategy_hint_vacuum = false; ! /* * StrategyGetBuffer --- 39,57 ---- /* Pointers to shared state */ static BufferStrategyControl *StrategyControl = NULL; + /* Buffer Recycling */ + #define MAX_RECYCLE_BUF_IDS 128 + static volatile int LocalRecycleBufIds[MAX_RECYCLE_BUF_IDS]; + + #define BUF_ID_NOT_SET -1 + int nextVictimRecycleBufId = BUF_ID_NOT_SET; + + int NScanRecycleBuffers = 0; + int ThisScanRecycleBuffers = 0; + /* Backend-local state about whether currently vacuuming */ bool strategy_hint_vacuum = false; ! bool strategy_hint_recycle = false; /* * StrategyGetBuffer *************** *** 56,66 **** * the caller must release that lock once the spinlock is dropped. */ volatile BufferDesc * ! StrategyGetBuffer(void) { volatile BufferDesc *buf; int trycounter; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* --- 66,118 ---- * the caller must release that lock once the spinlock is dropped. */ volatile BufferDesc * ! StrategyGetBuffer(bool *lock_held) { volatile BufferDesc *buf; int trycounter; + if (strategy_hint_recycle) + { + /* + * Get the next buffer from our local cyclic cache. + * Note that we don't need to hold the BufFreelistLock + * to get this buffer, because we aren't accessing any + * shared memory. + * + * Run private "clock cycle" + */ + if (++nextVictimRecycleBufId >= ThisScanRecycleBuffers) + nextVictimRecycleBufId = 0; + + /* + * If that slot hasn't been filled yet, use a new buffer + * allocated via the main shared buffer allocation strategy + */ + if (LocalRecycleBufIds[nextVictimRecycleBufId] != BUF_ID_NOT_SET) + { + buf = &BufferDescriptors[LocalRecycleBufIds[nextVictimRecycleBufId]]; + /* + * If the buffer is pinned we cannot use it in any circumstance. + * If usage_count == 0 then the buffer is fair game. + * + * We also choose this buffer if usage_count == 1. Strictly, this + * might sometimes be the wrong thing to do, but we rely on the + * high probability that it was this process that last touched + * the buffer. We do have to pick a victim, so it may as well be + * this one as any of the seldom touched blocks in the buffer pool. + */ + *lock_held = false; + LockBufHdr(buf); + if (buf->refcount == 0 && buf->usage_count <= 1) + return buf; + UnlockBufHdr(buf); + } + } + + /* + * If our selected buffer wasn't available, pick another... + */ + *lock_held = true; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); /* *************** *** 86,96 **** */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) return buf; UnlockBufHdr(buf); } ! /* Nothing on the freelist, so run the "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { --- 138,152 ---- */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) + { + if (strategy_hint_recycle) + LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id; return buf; + } UnlockBufHdr(buf); } ! /* Nothing on the freelist, so run the shared "clock sweep" algorithm */ trycounter = NBuffers; for (;;) { *************** *** 105,111 **** --- 161,171 ---- */ LockBufHdr(buf); if (buf->refcount == 0 && buf->usage_count == 0) + { + if (strategy_hint_recycle) + LocalRecycleBufIds[nextVictimRecycleBufId] = buf->buf_id; return buf; + } if (buf->usage_count > 0) { buf->usage_count--; *************** *** 197,204 **** --- 257,309 ---- StrategyHintVacuum(bool vacuum_active) { strategy_hint_vacuum = vacuum_active; + if (vacuum_active) + StrategyHintRecycleBuffers(NScanRecycleBuffers); + else + StrategyHintRecycleBuffers(0); } + /* + * StrategyHintRecycleOwnBuffers -- tell us whether to recycle buffers + * originally filled by this process. This is intended for use by + * callers who access blocks in a sequential pattern. Non-sequential + * access patterns could be disrupted severely by using this hint. + * + * Initial data suggests + * nRecycleBuffers = 16 for read-only scans (Mark Kirkwood) + * nRecycleBuffers = 32 for VACUUMs (Itagaki Takahiro) + * probably + * nRecycleBuffers >=128 for normal write-intensive tasks + * to allow for bgwriter activity + */ + void + StrategyHintRecycleBuffers(int nRecycleBuffers) + { + int i; + + if (nRecycleBuffers < 0 || nRecycleBuffers > NBuffers + || nRecycleBuffers > NScanRecycleBuffers) + nRecycleBuffers = 0; + + if (nRecycleBuffers > NScanRecycleBuffers) + ThisScanRecycleBuffers = NScanRecycleBuffers; + else + ThisScanRecycleBuffers = nRecycleBuffers; + + if (ThisScanRecycleBuffers > 0) + { + strategy_hint_recycle = true; + + /* just before 1st element, to allow for preincrement */ + nextVictimRecycleBufId = -1; + + /* prepare the cyclic buffer */ + for (i = 0; i < MAX_RECYCLE_BUF_IDS; i++) + LocalRecycleBufIds[i] = BUF_ID_NOT_SET; + } + else + strategy_hint_recycle = false; + } /* * StrategyShmemSize Index: src/backend/utils/misc/guc.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/misc/guc.c,v retrieving revision 1.379 diff -c -r1.379 guc.c *** src/backend/utils/misc/guc.c 6 Mar 2007 02:06:14 -0000 1.379 --- src/backend/utils/misc/guc.c 9 Mar 2007 17:36:24 -0000 *************** *** 1184,1189 **** --- 1184,1199 ---- }, { + {"scan_recycle_buffers", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the number of buffers to recycle during scans"), + NULL, + GUC_UNIT_BLOCKS + }, + &NScanRecycleBuffers, + 0, 0, 128, NULL, NULL + }, + + { {"port", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Sets the TCP port the server listens on."), NULL Index: src/include/miscadmin.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/miscadmin.h,v retrieving revision 1.193 diff -c -r1.193 miscadmin.h *** src/include/miscadmin.h 1 Mar 2007 14:52:04 -0000 1.193 --- src/include/miscadmin.h 9 Mar 2007 17:36:25 -0000 *************** *** 128,133 **** --- 128,134 ---- extern DLLIMPORT char *DataDir; extern DLLIMPORT int NBuffers; + extern DLLIMPORT int NScanRecycleBuffers; extern int MaxBackends; extern DLLIMPORT int MyProcPid; Index: src/include/storage/buf_internals.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/storage/buf_internals.h,v retrieving revision 1.89 diff -c -r1.89 buf_internals.h *** src/include/storage/buf_internals.h 5 Jan 2007 22:19:57 -0000 1.89 --- src/include/storage/buf_internals.h 9 Mar 2007 17:36:28 -0000 *************** *** 184,190 **** */ /* freelist.c */ ! extern volatile BufferDesc *StrategyGetBuffer(void); extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head); extern int StrategySyncStart(void); extern Size StrategyShmemSize(void); --- 184,190 ---- */ /* freelist.c */ ! extern volatile BufferDesc *StrategyGetBuffer(bool *lock_held); extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head); extern int StrategySyncStart(void); extern Size StrategyShmemSize(void); Index: src/include/storage/bufmgr.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/storage/bufmgr.h,v retrieving revision 1.102 diff -c -r1.102 bufmgr.h *** src/include/storage/bufmgr.h 5 Jan 2007 22:19:57 -0000 1.102 --- src/include/storage/bufmgr.h 9 Mar 2007 17:36:28 -0000 *************** *** 133,138 **** --- 133,140 ---- extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, BlockNumber firstDelBlock); extern void DropDatabaseBuffers(Oid dbid); + extern void DropAllSharedBuffers(void); + #ifdef NOT_USED extern void PrintPinnedBufs(void); *************** *** 157,161 **** --- 159,164 ---- /* in freelist.c */ extern void StrategyHintVacuum(bool vacuum_active); + extern void StrategyHintRecycleBuffers(int nRecycleBuffers); #endif
---------------------------(end of broadcast)--------------------------- TIP 6: explain analyze is your friend