Hi Hackers,

I noticed several additional paths in contrib modules, beyond [1],
that are potentially suitable for streamification:

1) pgstattuple — pgstatapprox.c and parts of pgstattuple_approx_internal
2) Bloom — scan paths in blgetbitmap() and maintenance paths in blbulkdelete()

The following patches streamify those code paths. No benchmarks have
been run yet.

[1] 
https://www.postgresql.org/message-id/flat/CABPTF7UeN2o-trr9r7K76rZExnO2M4SLfvTfbUY2CwQjCekgnQ%40mail.gmail.com

Feedbacks welcome.

--
Best,
Xuneng
From 8444107113a3b9a237520b41d322f7202e8c1502 Mon Sep 17 00:00:00 2001
From: alterego655 <[email protected]>
Date: Thu, 25 Dec 2025 13:40:13 +0800
Subject: [PATCH v1 3/3] Streamify heap bloat estimation scan.

Introduce a read-stream callback to skip all-visible pages via VM/FSM lookup and stream-read the rest, reducing page reads and improving pgstattuple_approx execution time on large relations.
---
 contrib/pgstattuple/pgstatapprox.c | 125 ++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 31 deletions(-)

diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index a59ff4e9d4f..eb5c26ffc10 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -23,6 +23,7 @@
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
 #include "storage/procarray.h"
+#include "storage/read_stream.h"
 
 PG_FUNCTION_INFO_V1(pgstattuple_approx);
 PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
@@ -45,6 +46,61 @@ typedef struct output_type
 
 #define NUM_OUTPUT_COLUMNS 10
 
+/*
+ * Struct for statapprox_heap read stream callback.
+ */
+typedef struct StatApproxReadStreamPrivate
+{
+	Relation	rel;
+	output_type *stat;
+	BlockNumber current_blocknum;
+	BlockNumber nblocks;
+	BlockNumber scanned;		/* count of pages actually read */
+	Buffer		vmbuffer;		/* for VM lookups */
+}			StatApproxReadStreamPrivate;
+
+/*
+ * Read stream callback for statapprox_heap.
+ *
+ * This callback checks the visibility map for each block. If the block is
+ * all-visible, we can get the free space from the FSM without reading the
+ * actual page, and skip to the next block. Only blocks that are not
+ * all-visible are returned for actual reading.
+ */
+static BlockNumber
+statapprox_heap_read_stream_next(ReadStream *stream,
+								 void *callback_private_data,
+								 void *per_buffer_data)
+{
+	StatApproxReadStreamPrivate *p = callback_private_data;
+
+	while (p->current_blocknum < p->nblocks)
+	{
+		BlockNumber blkno = p->current_blocknum++;
+		Size		freespace;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the page has only visible tuples, then we can find out the free
+		 * space from the FSM and move on without reading the page.
+		 */
+		if (VM_ALL_VISIBLE(p->rel, blkno, &p->vmbuffer))
+		{
+			freespace = GetRecordedFreeSpace(p->rel, blkno);
+			p->stat->tuple_len += BLCKSZ - freespace;
+			p->stat->free_space += freespace;
+			continue;
+		}
+
+		/* This block needs to be read */
+		p->scanned++;
+		return blkno;
+	}
+
+	return InvalidBlockNumber;
+}
+
 /*
  * This function takes an already open relation and scans its pages,
  * skipping those that have the corresponding visibility map bit set.
@@ -58,53 +114,58 @@ typedef struct output_type
 static void
 statapprox_heap(Relation rel, output_type *stat)
 {
-	BlockNumber scanned,
-				nblocks,
-				blkno;
-	Buffer		vmbuffer = InvalidBuffer;
+	BlockNumber nblocks;
 	BufferAccessStrategy bstrategy;
 	TransactionId OldestXmin;
+	StatApproxReadStreamPrivate p;
+	ReadStream *stream;
 
 	OldestXmin = GetOldestNonRemovableTransactionId(rel);
 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
 
 	nblocks = RelationGetNumberOfBlocks(rel);
-	scanned = 0;
 
-	for (blkno = 0; blkno < nblocks; blkno++)
+	/* Initialize read stream private data */
+	p.rel = rel;
+	p.stat = stat;
+	p.current_blocknum = 0;
+	p.nblocks = nblocks;
+	p.scanned = 0;
+	p.vmbuffer = InvalidBuffer;
+
+	/*
+	 * Create the read stream. We don't use READ_STREAM_USE_BATCHING because
+	 * the callback accesses the visibility map which may need to read VM
+	 * pages. While this shouldn't cause deadlocks, we err on the side of
+	 * caution.
+	 */
+	stream = read_stream_begin_relation(READ_STREAM_FULL,
+										bstrategy,
+										rel,
+										MAIN_FORKNUM,
+										statapprox_heap_read_stream_next,
+										&p,
+										0);
+
+	for (;;)
 	{
 		Buffer		buf;
 		Page		page;
 		OffsetNumber offnum,
 					maxoff;
-		Size		freespace;
-
-		CHECK_FOR_INTERRUPTS();
-
-		/*
-		 * If the page has only visible tuples, then we can find out the free
-		 * space from the FSM and move on.
-		 */
-		if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
-		{
-			freespace = GetRecordedFreeSpace(rel, blkno);
-			stat->tuple_len += BLCKSZ - freespace;
-			stat->free_space += freespace;
-			continue;
-		}
+		BlockNumber blkno;
 
-		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
-								 RBM_NORMAL, bstrategy);
+		buf = read_stream_next_buffer(stream, NULL);
+		if (buf == InvalidBuffer)
+			break;
 
 		LockBuffer(buf, BUFFER_LOCK_SHARE);
 
 		page = BufferGetPage(buf);
+		blkno = BufferGetBlockNumber(buf);
 
 		stat->free_space += PageGetExactFreeSpace(page);
 
-		/* We may count the page as scanned even if it's new/empty */
-		scanned++;
-
 		if (PageIsNew(page) || PageIsEmpty(page))
 		{
 			UnlockReleaseBuffer(buf);
@@ -169,6 +230,8 @@ statapprox_heap(Relation rel, output_type *stat)
 		UnlockReleaseBuffer(buf);
 	}
 
+	read_stream_end(stream);
+
 	stat->table_len = (uint64) nblocks * BLCKSZ;
 
 	/*
@@ -179,7 +242,7 @@ statapprox_heap(Relation rel, output_type *stat)
 	 * tuples in all-visible pages, so no correction is needed for that, and
 	 * we already accounted for the space in those pages, too.
 	 */
-	stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
+	stat->tuple_count = vac_estimate_reltuples(rel, nblocks, p.scanned,
 											   stat->tuple_count);
 
 	/* It's not clear if we could get -1 here, but be safe. */
@@ -190,16 +253,16 @@ statapprox_heap(Relation rel, output_type *stat)
 	 */
 	if (nblocks != 0)
 	{
-		stat->scanned_percent = 100.0 * scanned / nblocks;
+		stat->scanned_percent = 100.0 * p.scanned / nblocks;
 		stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
 		stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
 		stat->free_percent = 100.0 * stat->free_space / stat->table_len;
 	}
 
-	if (BufferIsValid(vmbuffer))
+	if (BufferIsValid(p.vmbuffer))
 	{
-		ReleaseBuffer(vmbuffer);
-		vmbuffer = InvalidBuffer;
+		ReleaseBuffer(p.vmbuffer);
+		p.vmbuffer = InvalidBuffer;
 	}
 }
 
-- 
2.51.0

From d3b15792ee09d7aba4df76273d8883a739d215ce Mon Sep 17 00:00:00 2001
From: alterego655 <[email protected]>
Date: Thu, 25 Dec 2025 13:40:02 +0800
Subject: [PATCH v1 2/3] Streamify Bloom VACUUM paths.

Use streaming reads in blbulkdelete() and blvacuumcleanup() to iterate index pages without repeated ReadBuffer calls, improving VACUUM performance and reducing buffer manager overhead during maintenance operations.
---
 contrib/bloom/blvacuum.c | 55 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/contrib/bloom/blvacuum.c b/contrib/bloom/blvacuum.c
index e68a9008f56..7452302f022 100644
--- a/contrib/bloom/blvacuum.c
+++ b/contrib/bloom/blvacuum.c
@@ -17,6 +17,7 @@
 #include "commands/vacuum.h"
 #include "storage/bufmgr.h"
 #include "storage/indexfsm.h"
+#include "storage/read_stream.h"
 
 
 /*
@@ -40,6 +41,8 @@ blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	Page		page;
 	BloomMetaPageData *metaData;
 	GenericXLogState *gxlogState;
+	BlockRangeReadStreamPrivate p;
+	ReadStream *stream;
 
 	if (stats == NULL)
 		stats = palloc0_object(IndexBulkDeleteResult);
@@ -51,6 +54,25 @@ blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	 * they can't contain tuples to delete.
 	 */
 	npages = RelationGetNumberOfBlocks(index);
+
+	/* Scan all blocks except the metapage using streaming reads */
+	p.current_blocknum = BLOOM_HEAD_BLKNO;
+	p.last_exclusive = npages;
+
+	/*
+	 * It is safe to use batchmode as block_range_read_stream_cb takes no
+	 * locks.
+	 */
+	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+										READ_STREAM_FULL |
+										READ_STREAM_USE_BATCHING,
+										info->strategy,
+										index,
+										MAIN_FORKNUM,
+										block_range_read_stream_cb,
+										&p,
+										0);
+
 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
 	{
 		BloomTuple *itup,
@@ -59,8 +81,7 @@ blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 
 		vacuum_delay_point(false);
 
-		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
-									RBM_NORMAL, info->strategy);
+		buffer = read_stream_next_buffer(stream, NULL);
 
 		LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 		gxlogState = GenericXLogStart(index);
@@ -133,6 +154,9 @@ blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 		UnlockReleaseBuffer(buffer);
 	}
 
+	Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+	read_stream_end(stream);
+
 	/*
 	 * Update the metapage's notFullPage list with whatever we found.  Our
 	 * info could already be out of date at this point, but blinsert() will
@@ -166,6 +190,8 @@ blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 	Relation	index = info->index;
 	BlockNumber npages,
 				blkno;
+	BlockRangeReadStreamPrivate p;
+	ReadStream *stream;
 
 	if (info->analyze_only)
 		return stats;
@@ -181,6 +207,25 @@ blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 	stats->num_pages = npages;
 	stats->pages_free = 0;
 	stats->num_index_tuples = 0;
+
+	/* Scan all blocks except the metapage using streaming reads */
+	p.current_blocknum = BLOOM_HEAD_BLKNO;
+	p.last_exclusive = npages;
+
+	/*
+	 * It is safe to use batchmode as block_range_read_stream_cb takes no
+	 * locks.
+	 */
+	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+										READ_STREAM_FULL |
+										READ_STREAM_USE_BATCHING,
+										info->strategy,
+										index,
+										MAIN_FORKNUM,
+										block_range_read_stream_cb,
+										&p,
+										0);
+
 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
 	{
 		Buffer		buffer;
@@ -188,8 +233,7 @@ blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 
 		vacuum_delay_point(false);
 
-		buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
-									RBM_NORMAL, info->strategy);
+		buffer = read_stream_next_buffer(stream, NULL);
 		LockBuffer(buffer, BUFFER_LOCK_SHARE);
 		page = BufferGetPage(buffer);
 
@@ -206,6 +250,9 @@ blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 		UnlockReleaseBuffer(buffer);
 	}
 
+	Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+	read_stream_end(stream);
+
 	IndexFreeSpaceMapVacuum(info->index);
 
 	return stats;
-- 
2.51.0

From 0a211e788d964aebd876dc4472440e8f234ce38a Mon Sep 17 00:00:00 2001
From: alterego655 <[email protected]>
Date: Thu, 25 Dec 2025 13:39:42 +0800
Subject: [PATCH v1 1/3] Switch Bloom scan paths to streaming read.

Replace per-page ReadBuffer loops in blgetbitmap() with read_stream_begin_relation() and sequential buffer iteration, reducing buffer churn and improving scan efficiency on large Bloom indexes.
---
 contrib/bloom/blscan.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
index 0d71edbe91c..b1fdabaab74 100644
--- a/contrib/bloom/blscan.c
+++ b/contrib/bloom/blscan.c
@@ -17,6 +17,7 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "storage/bufmgr.h"
+#include "storage/read_stream.h"
 
 /*
  * Begin scan of bloom index.
@@ -75,11 +76,13 @@ int64
 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 {
 	int64		ntids = 0;
-	BlockNumber blkno = BLOOM_HEAD_BLKNO,
+	BlockNumber blkno,
 				npages;
 	int			i;
 	BufferAccessStrategy bas;
 	BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+	BlockRangeReadStreamPrivate p;
+	ReadStream *stream;
 
 	if (so->sign == NULL)
 	{
@@ -119,14 +122,29 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	if (scan->instrument)
 		scan->instrument->nsearches++;
 
+	/* Scan all blocks except the metapage using streaming reads */
+	p.current_blocknum = BLOOM_HEAD_BLKNO;
+	p.last_exclusive = npages;
+
+	/*
+	 * It is safe to use batchmode as block_range_read_stream_cb takes no
+	 * locks.
+	 */
+	stream = read_stream_begin_relation(READ_STREAM_FULL |
+										READ_STREAM_USE_BATCHING,
+										bas,
+										scan->indexRelation,
+										MAIN_FORKNUM,
+										block_range_read_stream_cb,
+										&p,
+										0);
+
 	for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
 	{
 		Buffer		buffer;
 		Page		page;
 
-		buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
-									blkno, RBM_NORMAL, bas);
-
+		buffer = read_stream_next_buffer(stream, NULL);
 		LockBuffer(buffer, BUFFER_LOCK_SHARE);
 		page = BufferGetPage(buffer);
 
@@ -162,6 +180,9 @@ blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 		UnlockReleaseBuffer(buffer);
 		CHECK_FOR_INTERRUPTS();
 	}
+
+	Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+	read_stream_end(stream);
 	FreeAccessStrategy(bas);
 
 	return ntids;
-- 
2.51.0

Reply via email to