Hi hackers,

I'd like to propose some new hooks for the buffer manager.  My primary goal
is to allow users to create an additional caching mechanism between the
shared buffers and disk for evicted buffers.  For example, some EC2
instance classes have ephemeral disks that are physically attached to the
host machine that might be useful for such a cache.  Presumably there are
other uses (e.g., gathering more information about the buffer cache), but
this is the main use-case I have in mind.  I am proposing the following new
hooks:

 * bufmgr_read_hook: called in place of smgrread() in ReadBuffer_common().
   It is expected that such hooks would call smgrread() as necessary.

 * bufmgr_write_hook: called before smgrwrite() in FlushBuffer().  The hook
   indicateѕ whether the buffer is being evicted.  Hook functions must
   gracefully handle concurrent hint bit updates to the page.

 * bufmgr_invalidate_hook: called within InvalidateBuffer().

The attached patch is a first attempt at introducing these hooks with
acceptable names, placements, arguments, etc.

Thoughts?

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From d1683d73df8930927a464555f256c8c91c7cf24e Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathandboss...@gmail.com>
Date: Thu, 11 Aug 2022 16:24:26 -0700
Subject: [PATCH v1 1/1] Introduce bufmgr hooks.

These hooks can be used for maintaining a secondary buffer cache
outside of the regular shared buffers.  In theory, there are many
other potential uses.
---
 src/backend/storage/buffer/bufmgr.c | 54 ++++++++++++++++++++---------
 src/include/storage/buf_internals.h | 14 ++++++++
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 9c1bd508d3..365272d139 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -166,6 +166,15 @@ static bool IsForInput;
 /* local state for LockBufferForCleanup */
 static BufferDesc *PinCountWaitBuf = NULL;
 
+/* hook for plugins to get control when reading in a page */
+bufmgr_read_hook_type bufmgr_read_hook = NULL;
+
+/* hook for plugins to get control when evicting a page */
+bufmgr_write_hook_type bufmgr_write_hook = NULL;
+
+/* hook for plugins to get control when invalidating a page */
+bufmgr_invalidate_hook_type bufmgr_invalidate_hook = NULL;
+
 /*
  * Backend-Private refcount management:
  *
@@ -482,7 +491,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool for_eviction);
 static void FindAndDropRelationBuffers(RelFileLocator rlocator,
 									   ForkNumber forkNum,
 									   BlockNumber nForkBlock,
@@ -1015,17 +1024,22 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			instr_time	io_start,
 						io_time;
 
-			if (track_io_timing)
-				INSTR_TIME_SET_CURRENT(io_start);
+			if (bufmgr_read_hook)
+				(*bufmgr_read_hook) (smgr, forkNum, blockNum, (char *) bufBlock);
+			else
+			{
+				if (track_io_timing)
+					INSTR_TIME_SET_CURRENT(io_start);
 
-			smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
+				smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
 
-			if (track_io_timing)
-			{
-				INSTR_TIME_SET_CURRENT(io_time);
-				INSTR_TIME_SUBTRACT(io_time, io_start);
-				pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
-				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
+				if (track_io_timing)
+				{
+					INSTR_TIME_SET_CURRENT(io_time);
+					INSTR_TIME_SUBTRACT(io_time, io_start);
+					pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
+					INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
+				}
 			}
 
 			/* check for garbage data */
@@ -1269,7 +1283,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rlocator.locator.dbOid,
 														  smgr->smgr_rlocator.locator.relNumber);
 
-				FlushBuffer(buf, NULL);
+				FlushBuffer(buf, NULL, true);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1544,6 +1558,9 @@ retry:
 		goto retry;
 	}
 
+	if (bufmgr_invalidate_hook)
+		(*bufmgr_invalidate_hook) (buf);
+
 	/*
 	 * Clear out the buffer's tag and flags.  We must do this to ensure that
 	 * linear scans of the buffer array don't think the buffer is valid.
@@ -2573,7 +2590,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 	PinBuffer_Locked(bufHdr);
 	LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
 
-	FlushBuffer(bufHdr, NULL);
+	FlushBuffer(bufHdr, NULL, false);
 
 	LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 
@@ -2822,7 +2839,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
  * as the second parameter.  If not, pass NULL.
  */
 static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln, bool for_eviction)
 {
 	XLogRecPtr	recptr;
 	ErrorContextCallback errcallback;
@@ -2902,6 +2919,9 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 	 */
 	bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
 
+	if (bufmgr_write_hook)
+		(*bufmgr_write_hook) (reln, buf, bufToWrite, for_eviction);
+
 	if (track_io_timing)
 		INSTR_TIME_SET_CURRENT(io_start);
 
@@ -3584,7 +3604,7 @@ FlushRelationBuffers(Relation rel)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, RelationGetSmgr(rel));
+			FlushBuffer(bufHdr, RelationGetSmgr(rel), false);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3679,7 +3699,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, srelent->srel);
+			FlushBuffer(bufHdr, srelent->srel, false);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3880,7 +3900,7 @@ FlushDatabaseBuffers(Oid dbid)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, NULL);
+			FlushBuffer(bufHdr, NULL, false);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3907,7 +3927,7 @@ FlushOneBuffer(Buffer buffer)
 
 	Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
 
-	FlushBuffer(bufHdr, NULL);
+	FlushBuffer(bufHdr, NULL, false);
 }
 
 /*
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 72466551d7..aa0eb13ed7 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -336,6 +336,20 @@ typedef struct CkptSortItem
 
 extern PGDLLIMPORT CkptSortItem *CkptBufferIds;
 
+/* hook for plugins to get control when reading in a page */
+typedef void (*bufmgr_read_hook_type) (SMgrRelation smgr, ForkNumber forknum,
+									   BlockNumber blocknum, char *buffer);
+extern PGDLLIMPORT bufmgr_read_hook_type bufmgr_read_hook;
+
+/* hook for plugins to get control when writing a page */
+typedef void (*bufmgr_write_hook_type) (SMgrRelation smgr, BufferDesc *buf,
+										char *buffer, bool for_eviction);
+extern PGDLLIMPORT bufmgr_write_hook_type bufmgr_write_hook;
+
+/* hook for plugins to get control when invalidating a page */
+typedef void (*bufmgr_invalidate_hook_type) (BufferDesc *buf);
+extern PGDLLIMPORT bufmgr_invalidate_hook_type bufmgr_invalidate_hook;
+
 /*
  * Internal buffer management routines
  */
-- 
2.25.1

Reply via email to