On Wed, Feb 5, 2025 at 5:26 PM Melanie Plageman
<melanieplage...@gmail.com> wrote:
>
> Yes, looking at these results, I also feel good about it. I've updated
> the commit metadata in attached v14, but I could use a round of review
> before pushing it.

I've done a bit of self-review and updated these patches.

- Melanie
From 0483495b5a3a35cf27672ad08901509b8c32d49d Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Wed, 5 Feb 2025 17:23:05 -0500
Subject: [PATCH v15 2/2] Use streaming read I/O in VACUUM's third phase

Make vacuum's third phase (its second pass over the heap), which reaps
dead items collected in the first phase and marks them as reusable, use
the read stream API. This commit adds a new read stream callback,
vacuum_reap_lp_read_stream_next(), that looks ahead in the TidStore and
returns the next block number to read for vacuum.

Author: Melanie Plageman <melanieplage...@gmail.com>
Co-authored-by: Thomas Munro <thomas.mu...@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKGKN3oy0bN_3yv8hd78a4%2BM1tJC9z7mD8%2Bf%2ByA%2BGeoFUwQ%40mail.gmail.com
---
 src/backend/access/heap/vacuumlazy.c | 49 +++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index b941158c645..740565b8379 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2261,6 +2261,29 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 	return allindexes;
 }
 
+/*
+ * Read stream callback for vacuum's third phase (second pass over the heap).
+ * Gets the next block from the TID store and returns it or InvalidBlockNumber
+ * if there are no further blocks to vacuum.
+ */
+static BlockNumber
+vacuum_reap_lp_read_stream_next(ReadStream *stream,
+								void *callback_private_data,
+								void *per_buffer_data)
+{
+	TidStoreIter *iter = callback_private_data;
+	TidStoreIterResult *iter_result;
+
+	iter_result = TidStoreIterateNext(iter);
+	if (iter_result == NULL)
+		return InvalidBlockNumber;
+
+	/* Save the TidStoreIterResult for later, so we can extract the offsets. */
+	memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
+
+	return iter_result->blkno;
+}
+
 /*
  *	lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
  *
@@ -2281,6 +2304,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 static void
 lazy_vacuum_heap_rel(LVRelState *vacrel)
 {
+	ReadStream *stream;
 	BlockNumber vacuumed_pages = 0;
 	Buffer		vmbuffer = InvalidBuffer;
 	LVSavedErrInfo saved_err_info;
@@ -2301,7 +2325,17 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 							 InvalidBlockNumber, InvalidOffsetNumber);
 
 	iter = TidStoreBeginIterate(vacrel->dead_items);
-	while ((iter_result = TidStoreIterateNext(iter)) != NULL)
+
+	/* Set up the read stream */
+	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+										vacrel->bstrategy,
+										vacrel->rel,
+										MAIN_FORKNUM,
+										vacuum_reap_lp_read_stream_next,
+										iter,
+										sizeof(TidStoreIterResult));
+
+	while (true)
 	{
 		BlockNumber blkno;
 		Buffer		buf;
@@ -2312,9 +2346,14 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 
 		vacuum_delay_point();
 
-		blkno = iter_result->blkno;
-		vacrel->blkno = blkno;
+		buf = read_stream_next_buffer(stream, (void **) &iter_result);
 
+		if (!BufferIsValid(buf))
+			break;
+
+		vacrel->blkno = blkno = BufferGetBlockNumber(buf);
+
+		Assert(iter_result);
 		num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
 		Assert(num_offsets <= lengthof(offsets));
 
@@ -2326,8 +2365,6 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 		visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
 
 		/* We need a non-cleanup exclusive lock to mark dead_items unused */
-		buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-								 vacrel->bstrategy);
 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 		lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
 							  num_offsets, vmbuffer);
@@ -2340,6 +2377,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 		RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
 		vacuumed_pages++;
 	}
+
+	read_stream_end(stream);
 	TidStoreEndIterate(iter);
 
 	vacrel->blkno = InvalidBlockNumber;
-- 
2.34.1

From bc33c168c2799dd397b7b67f22e3d8fdf40bc043 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Wed, 5 Feb 2025 17:21:41 -0500
Subject: [PATCH v15 1/2] Use streaming read I/O in VACUUM's first phase

Make vacuum's first phase, which prunes and freezes tuples and records
dead TIDs, use the read stream API by by converting
heap_vac_scan_next_block() to a read stream callback.

Discussion: https://postgr.es/m/CA%2BhUKGKN3oy0bN_3yv8hd78a4%2BM1tJC9z7mD8%2Bf%2ByA%2BGeoFUwQ%40mail.gmail.com
---
 src/backend/access/heap/vacuumlazy.c | 131 +++++++++++++++++----------
 1 file changed, 83 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 075af385cd1..b941158c645 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -108,6 +108,7 @@
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
 #include "storage/lmgr.h"
+#include "storage/read_stream.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_rusage.h"
 #include "utils/timestamp.h"
@@ -296,8 +297,9 @@ typedef struct LVSavedErrInfo
 
 /* non-export function prototypes */
 static void lazy_scan_heap(LVRelState *vacrel);
-static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
-									 bool *all_visible_according_to_vm);
+static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
+											void *callback_private_data,
+											void *per_buffer_data);
 static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
 								   BlockNumber blkno, Page page,
@@ -907,10 +909,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 static void
 lazy_scan_heap(LVRelState *vacrel)
 {
+	ReadStream *stream;
 	BlockNumber rel_pages = vacrel->rel_pages,
-				blkno,
+				blkno = 0,
 				next_fsm_block_to_vacuum = 0;
-	bool		all_visible_according_to_vm;
+	bool	   *all_visible_according_to_vm = NULL;
 
 	Buffer		vmbuffer = InvalidBuffer;
 	const int	initprog_index[] = {
@@ -932,20 +935,22 @@ lazy_scan_heap(LVRelState *vacrel)
 	vacrel->next_unskippable_allvis = false;
 	vacrel->next_unskippable_vmbuffer = InvalidBuffer;
 
-	while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
+	/* Set up the read stream for vacuum's first pass through the heap */
+	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+										vacrel->bstrategy,
+										vacrel->rel,
+										MAIN_FORKNUM,
+										heap_vac_scan_next_block,
+										vacrel,
+										sizeof(bool));
+
+	while (true)
 	{
 		Buffer		buf;
 		Page		page;
 		bool		has_lpdead_items;
 		bool		got_cleanup_lock = false;
 
-		vacrel->scanned_pages++;
-
-		/* Report as block scanned, update error traceback information */
-		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
-		update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
-								 blkno, InvalidOffsetNumber);
-
 		vacuum_delay_point();
 
 		/*
@@ -986,7 +991,8 @@ lazy_scan_heap(LVRelState *vacrel)
 
 			/*
 			 * Vacuum the Free Space Map to make newly-freed space visible on
-			 * upper-level FSM pages.  Note we have not yet processed blkno.
+			 * upper-level FSM pages.  Note that blkno is the previously
+			 * processed block.
 			 */
 			FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
 									blkno);
@@ -997,6 +1003,23 @@ lazy_scan_heap(LVRelState *vacrel)
 										 PROGRESS_VACUUM_PHASE_SCAN_HEAP);
 		}
 
+		buf = read_stream_next_buffer(stream, (void **) &all_visible_according_to_vm);
+
+		if (!BufferIsValid(buf))
+			break;
+
+		Assert(all_visible_according_to_vm);
+		CheckBufferIsPinnedOnce(buf);
+		page = BufferGetPage(buf);
+		blkno = BufferGetBlockNumber(buf);
+
+		vacrel->scanned_pages++;
+
+		/* Report as block scanned, update error traceback information */
+		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
+		update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
+								 blkno, InvalidOffsetNumber);
+
 		/*
 		 * Pin the visibility map page in case we need to mark the page
 		 * all-visible.  In most cases this will be very cheap, because we'll
@@ -1004,10 +1027,6 @@ lazy_scan_heap(LVRelState *vacrel)
 		 */
 		visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
 
-		buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-								 vacrel->bstrategy);
-		page = BufferGetPage(buf);
-
 		/*
 		 * We need a buffer cleanup lock to prune HOT chains and defragment
 		 * the page in lazy_scan_prune.  But when it's not possible to acquire
@@ -1063,7 +1082,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		 */
 		if (got_cleanup_lock)
 			lazy_scan_prune(vacrel, buf, blkno, page,
-							vmbuffer, all_visible_according_to_vm,
+							vmbuffer, *all_visible_according_to_vm,
 							&has_lpdead_items);
 
 		/*
@@ -1116,8 +1135,12 @@ lazy_scan_heap(LVRelState *vacrel)
 	if (BufferIsValid(vmbuffer))
 		ReleaseBuffer(vmbuffer);
 
-	/* report that everything is now scanned */
-	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
+	/*
+	 * Report that everything is now scanned. We never skip scanning the last
+	 * block in the relation, so we can pass rel_pages here.
+	 */
+	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
+								 rel_pages);
 
 	/* now we can compute the new value for pg_class.reltuples */
 	vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
@@ -1132,6 +1155,8 @@ lazy_scan_heap(LVRelState *vacrel)
 		Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
 		vacrel->missed_dead_tuples;
 
+	read_stream_end(stream);
+
 	/*
 	 * Do index vacuuming (call each index's ambulkdelete routine), then do
 	 * related heap vacuuming
@@ -1142,12 +1167,14 @@ lazy_scan_heap(LVRelState *vacrel)
 	/*
 	 * Vacuum the remainder of the Free Space Map.  We must do this whether or
 	 * not there were indexes, and whether or not we bypassed index vacuuming.
+	 * We can pass rel_pages here because we never skip scanning the last
+	 * block of the relation.
 	 */
-	if (blkno > next_fsm_block_to_vacuum)
-		FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
+	if (rel_pages > next_fsm_block_to_vacuum)
+		FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
 
 	/* report all blocks vacuumed */
-	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
+	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
 
 	/* Do final index cleanup (call each index's amvacuumcleanup routine) */
 	if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
@@ -1155,28 +1182,37 @@ lazy_scan_heap(LVRelState *vacrel)
 }
 
 /*
- *	heap_vac_scan_next_block() -- get next block for vacuum to process
- *
- * lazy_scan_heap() calls here every time it needs to get the next block to
- * prune and vacuum.  The function uses the visibility map, vacuum options,
- * and various thresholds to skip blocks which do not need to be processed and
- * sets blkno to the next block to process.
- *
- * The block number and visibility status of the next block to process are set
- * in *blkno and *all_visible_according_to_vm.  The return value is false if
- * there are no further blocks to process.
- *
- * vacrel is an in/out parameter here.  Vacuum options and information about
- * the relation are read.  vacrel->skippedallvis is set if we skip a block
- * that's all-visible but not all-frozen, to ensure that we don't update
- * relfrozenxid in that case.  vacrel also holds information about the next
- * unskippable block, as bookkeeping for this function.
+ *	heap_vac_scan_next_block() -- read stream callback to get the next block
+ *	for vacuum to process
+ *
+ * Every time lazy_scan_heap() needs a new block to process during its first
+ * phase, it invokes read_stream_next_buffer() with a stream set up to call
+ * heap_vac_scan_next_block() to get the next block.
+ *
+ * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
+ * various thresholds to skip blocks which do not need to be processed and
+ * returns the next block to process or InvalidBlockNumber if there are no
+ * remaining blocks.
+ *
+ * The visibility status of the next block to process is set in the
+ * per_buffer_data.
+ *
+ * callback_private_data contains a reference to the LVRelState, passed to the
+ * read stream API during stream setup. The LVRelState is an in/out parameter
+ * here (locally named `vacrel`). Vacuum options and information about the
+ * relation are read from it. vacrel->skippedallvis is set if we skip a block
+ * that's all-visible but not all-frozen (to ensure that we don't update
+ * relfrozenxid in that case). vacrel also holds information about the next
+ * unskippable block -- as bookkeeping for this function.
  */
-static bool
-heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
-						 bool *all_visible_according_to_vm)
+static BlockNumber
+heap_vac_scan_next_block(ReadStream *stream,
+						 void *callback_private_data,
+						 void *per_buffer_data)
 {
 	BlockNumber next_block;
+	LVRelState *vacrel = callback_private_data;
+	bool	   *all_visible_according_to_vm = per_buffer_data;
 
 	/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
 	next_block = vacrel->current_block + 1;
@@ -1189,8 +1225,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
 			ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
 			vacrel->next_unskippable_vmbuffer = InvalidBuffer;
 		}
-		*blkno = vacrel->rel_pages;
-		return false;
+		return InvalidBlockNumber;
 	}
 
 	/*
@@ -1239,9 +1274,9 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
 		 * but chose not to.  We know that they are all-visible in the VM,
 		 * otherwise they would've been unskippable.
 		 */
-		*blkno = vacrel->current_block = next_block;
+		vacrel->current_block = next_block;
 		*all_visible_according_to_vm = true;
-		return true;
+		return vacrel->current_block;
 	}
 	else
 	{
@@ -1251,9 +1286,9 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
 		 */
 		Assert(next_block == vacrel->next_unskippable_block);
 
-		*blkno = vacrel->current_block = next_block;
+		vacrel->current_block = next_block;
 		*all_visible_according_to_vm = vacrel->next_unskippable_allvis;
-		return true;
+		return vacrel->current_block;
 	}
 }
 
-- 
2.34.1

Reply via email to