On Thu, Feb 6, 2025 at 1:06 PM Melanie Plageman <melanieplage...@gmail.com> wrote: > > On Wed, Feb 5, 2025 at 5:26 PM Melanie Plageman > <melanieplage...@gmail.com> wrote: > > > > Yes, looking at these results, I also feel good about it. I've updated > > the commit metadata in attached v14, but I could use a round of review > > before pushing it. > > I've done a bit of self-review and updated these patches.
This needed a rebase in light of 052026c9b90. v16 attached has an additional commit which converts the block information parameters to heap_vac_scan_next_block() into flags because we can only get one piece of information per block from the read stream API. This seemed nicer than a cumbersome struct. - Melanie
From 920b567a0afa77f1478f804dc934936a9e9e9ae8 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Wed, 5 Feb 2025 17:23:05 -0500 Subject: [PATCH v16 3/3] Use streaming read I/O in VACUUM's third phase Make vacuum's third phase (its second pass over the heap), which reaps dead items collected in the first phase and marks them as reusable, use the read stream API. This commit adds a new read stream callback, vacuum_reap_lp_read_stream_next(), that looks ahead in the TidStore and returns the next block number to read for vacuum. Author: Melanie Plageman <melanieplage...@gmail.com> Co-authored-by: Thomas Munro <thomas.mu...@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGKN3oy0bN_3yv8hd78a4%2BM1tJC9z7mD8%2Bf%2ByA%2BGeoFUwQ%40mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 49 +++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f705f31f5e6..5b2d9c535c1 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2637,6 +2637,29 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) return allindexes; } +/* + * Read stream callback for vacuum's third phase (second pass over the heap). + * Gets the next block from the TID store and returns it or InvalidBlockNumber + * if there are no further blocks to vacuum. + */ +static BlockNumber +vacuum_reap_lp_read_stream_next(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + TidStoreIter *iter = callback_private_data; + TidStoreIterResult *iter_result; + + iter_result = TidStoreIterateNext(iter); + if (iter_result == NULL) + return InvalidBlockNumber; + + /* Save the TidStoreIterResult for later, so we can extract the offsets. */ + memcpy(per_buffer_data, iter_result, sizeof(*iter_result)); + + return iter_result->blkno; +} + /* * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy * @@ -2657,6 +2680,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) static void lazy_vacuum_heap_rel(LVRelState *vacrel) { + ReadStream *stream; BlockNumber vacuumed_pages = 0; Buffer vmbuffer = InvalidBuffer; LVSavedErrInfo saved_err_info; @@ -2677,7 +2701,17 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) InvalidBlockNumber, InvalidOffsetNumber); iter = TidStoreBeginIterate(vacrel->dead_items); - while ((iter_result = TidStoreIterateNext(iter)) != NULL) + + /* Set up the read stream */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, + vacrel->bstrategy, + vacrel->rel, + MAIN_FORKNUM, + vacuum_reap_lp_read_stream_next, + iter, + sizeof(TidStoreIterResult)); + + while (true) { BlockNumber blkno; Buffer buf; @@ -2688,9 +2722,14 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) vacuum_delay_point(); - blkno = iter_result->blkno; - vacrel->blkno = blkno; + buf = read_stream_next_buffer(stream, (void **) &iter_result); + if (!BufferIsValid(buf)) + break; + + vacrel->blkno = blkno = BufferGetBlockNumber(buf); + + Assert(iter_result); num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets)); Assert(num_offsets <= lengthof(offsets)); @@ -2702,8 +2741,6 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); /* We need a non-cleanup exclusive lock to mark dead_items unused */ - buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - vacrel->bstrategy); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); lazy_vacuum_heap_page(vacrel, blkno, buf, offsets, num_offsets, vmbuffer); @@ -2716,6 +2753,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel) RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); vacuumed_pages++; } + + read_stream_end(stream); TidStoreEndIterate(iter); vacrel->blkno = InvalidBlockNumber; -- 2.34.1
From 4d6e90bd73dc185415d0278a19b22c691b6fa598 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Tue, 11 Feb 2025 19:02:13 -0500 Subject: [PATCH v16 1/3] Convert heap_vac_scan_next_block() boolean parameters to flags The read stream API only allows one piece of extra per block state to be passed back to the API user. Heap vacuum needs to know whether or not a given block was all-visible in the visibility map and whether or not it was eagerly scanned. Convert these two pieces of information to flags so that they can be passed as a single argument to heap_vac_scan_next_block() (which will become the read stream API callback for heap phase I vacuuming). --- src/backend/access/heap/vacuumlazy.c | 47 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8c387ae557e..25aeb23aa30 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -248,6 +248,13 @@ typedef enum */ #define EAGER_SCAN_REGION_SIZE 4096 +/* + * heap_vac_scan_next_block() sets these statuses to communicate per-block + * information to the caller. + */ +#define VAC_BLK_WAS_EAGER_SCANNED (1 << 0) +#define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1) + typedef struct LVRelState { /* Target heap relation and its indexes */ @@ -417,8 +424,7 @@ static void lazy_scan_heap(LVRelState *vacrel); static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params); static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - bool *all_visible_according_to_vm, - bool *was_eager_scanned); + uint8 *blk_flags); static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -1171,8 +1177,7 @@ lazy_scan_heap(LVRelState *vacrel) BlockNumber rel_pages = vacrel->rel_pages, blkno, next_fsm_block_to_vacuum = 0; - bool all_visible_according_to_vm, - was_eager_scanned = false; + uint8 blk_flags = 0; BlockNumber orig_eager_scan_success_limit = vacrel->eager_scan_remaining_successes; /* for logging */ Buffer vmbuffer = InvalidBuffer; @@ -1196,8 +1201,7 @@ lazy_scan_heap(LVRelState *vacrel) vacrel->next_unskippable_eager_scanned = false; vacrel->next_unskippable_vmbuffer = InvalidBuffer; - while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm, - &was_eager_scanned)) + while (heap_vac_scan_next_block(vacrel, &blkno, &blk_flags)) { Buffer buf; Page page; @@ -1206,7 +1210,7 @@ lazy_scan_heap(LVRelState *vacrel) bool got_cleanup_lock = false; vacrel->scanned_pages++; - if (was_eager_scanned) + if (blk_flags & VAC_BLK_WAS_EAGER_SCANNED) vacrel->eager_scanned_pages++; /* Report as block scanned, update error traceback information */ @@ -1331,7 +1335,8 @@ lazy_scan_heap(LVRelState *vacrel) */ if (got_cleanup_lock) lazy_scan_prune(vacrel, buf, blkno, page, - vmbuffer, all_visible_according_to_vm, + vmbuffer, + blk_flags & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, &has_lpdead_items, &vm_page_frozen); /* @@ -1348,7 +1353,8 @@ lazy_scan_heap(LVRelState *vacrel) * exclude pages skipped due to cleanup lock contention from eager * freeze algorithm caps. */ - if (got_cleanup_lock && was_eager_scanned) + if (got_cleanup_lock && + blk_flags & VAC_BLK_WAS_EAGER_SCANNED) { /* Aggressive vacuums do not eager scan. */ Assert(!vacrel->aggressive); @@ -1479,11 +1485,11 @@ lazy_scan_heap(LVRelState *vacrel) * and various thresholds to skip blocks which do not need to be processed and * sets blkno to the next block to process. * - * The block number and visibility status of the next block to process are set - * in *blkno and *all_visible_according_to_vm. The return value is false if - * there are no further blocks to process. If the block is being eagerly - * scanned, was_eager_scanned is set so that the caller can count whether or - * not an eagerly scanned page is successfully frozen. + * The block number of the next block to process is set in *blkno and its + * visibility status and whether or not it was eager scanned is set in + * *blk_flags. + * + * The return value is false if there are no further blocks to process. * * vacrel is an in/out parameter here. Vacuum options and information about * the relation are read. vacrel->skippedallvis is set if we skip a block @@ -1493,15 +1499,14 @@ lazy_scan_heap(LVRelState *vacrel) */ static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - bool *all_visible_according_to_vm, - bool *was_eager_scanned) + uint8 *blk_flags) { BlockNumber next_block; /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ next_block = vacrel->current_block + 1; - *was_eager_scanned = false; + *blk_flags = 0; /* Have we reached the end of the relation? */ if (next_block >= vacrel->rel_pages) @@ -1562,7 +1567,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, * otherwise they would've been unskippable. */ *blkno = vacrel->current_block = next_block; - *all_visible_according_to_vm = true; + *blk_flags |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; return true; } else @@ -1574,8 +1579,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, Assert(next_block == vacrel->next_unskippable_block); *blkno = vacrel->current_block = next_block; - *all_visible_according_to_vm = vacrel->next_unskippable_allvis; - *was_eager_scanned = vacrel->next_unskippable_eager_scanned; + if (vacrel->next_unskippable_allvis) + *blk_flags |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; + if (vacrel->next_unskippable_eager_scanned) + *blk_flags |= VAC_BLK_WAS_EAGER_SCANNED; return true; } } -- 2.34.1
From e321b5caacfc231b0cb64d99881d60a487fa3a8a Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Wed, 5 Feb 2025 17:21:41 -0500 Subject: [PATCH v16 2/3] Use streaming read I/O in VACUUM's first phase Make vacuum's first phase, which prunes and freezes tuples and records dead TIDs, use the read stream API by by converting heap_vac_scan_next_block() to a read stream callback. Discussion: https://postgr.es/m/CA%2BhUKGKN3oy0bN_3yv8hd78a4%2BM1tJC9z7mD8%2Bf%2ByA%2BGeoFUwQ%40mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 139 +++++++++++++++++---------- 1 file changed, 86 insertions(+), 53 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 25aeb23aa30..f705f31f5e6 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -153,6 +153,7 @@ #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" +#include "storage/read_stream.h" #include "utils/lsyscache.h" #include "utils/pg_rusage.h" #include "utils/timestamp.h" @@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo static void lazy_scan_heap(LVRelState *vacrel); static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params); -static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - uint8 *blk_flags); +static BlockNumber heap_vac_scan_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data); static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, static void lazy_scan_heap(LVRelState *vacrel) { + ReadStream *stream; BlockNumber rel_pages = vacrel->rel_pages, - blkno, + blkno = 0, next_fsm_block_to_vacuum = 0; - uint8 blk_flags = 0; + uint8 *blk_flags = NULL; BlockNumber orig_eager_scan_success_limit = vacrel->eager_scan_remaining_successes; /* for logging */ Buffer vmbuffer = InvalidBuffer; @@ -1201,7 +1204,16 @@ lazy_scan_heap(LVRelState *vacrel) vacrel->next_unskippable_eager_scanned = false; vacrel->next_unskippable_vmbuffer = InvalidBuffer; - while (heap_vac_scan_next_block(vacrel, &blkno, &blk_flags)) + /* Set up the read stream for vacuum's first pass through the heap */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, + vacrel->bstrategy, + vacrel->rel, + MAIN_FORKNUM, + heap_vac_scan_next_block, + vacrel, + sizeof(bool)); + + while (true) { Buffer buf; Page page; @@ -1209,15 +1221,6 @@ lazy_scan_heap(LVRelState *vacrel) bool vm_page_frozen = false; bool got_cleanup_lock = false; - vacrel->scanned_pages++; - if (blk_flags & VAC_BLK_WAS_EAGER_SCANNED) - vacrel->eager_scanned_pages++; - - /* Report as block scanned, update error traceback information */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); - update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, - blkno, InvalidOffsetNumber); - vacuum_delay_point(); /* @@ -1258,7 +1261,8 @@ lazy_scan_heap(LVRelState *vacrel) /* * Vacuum the Free Space Map to make newly-freed space visible on - * upper-level FSM pages. Note we have not yet processed blkno. + * upper-level FSM pages. Note that blkno is the previously + * processed block. */ FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); @@ -1269,6 +1273,25 @@ lazy_scan_heap(LVRelState *vacrel) PROGRESS_VACUUM_PHASE_SCAN_HEAP); } + buf = read_stream_next_buffer(stream, (void **) &blk_flags); + + if (!BufferIsValid(buf)) + break; + + Assert(blk_flags); + CheckBufferIsPinnedOnce(buf); + page = BufferGetPage(buf); + blkno = BufferGetBlockNumber(buf); + + vacrel->scanned_pages++; + if (*blk_flags & VAC_BLK_WAS_EAGER_SCANNED) + vacrel->eager_scanned_pages++; + + /* Report as block scanned, update error traceback information */ + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); + update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, + blkno, InvalidOffsetNumber); + /* * Pin the visibility map page in case we need to mark the page * all-visible. In most cases this will be very cheap, because we'll @@ -1276,10 +1299,6 @@ lazy_scan_heap(LVRelState *vacrel) */ visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); - buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - vacrel->bstrategy); - page = BufferGetPage(buf); - /* * We need a buffer cleanup lock to prune HOT chains and defragment * the page in lazy_scan_prune. But when it's not possible to acquire @@ -1336,7 +1355,7 @@ lazy_scan_heap(LVRelState *vacrel) if (got_cleanup_lock) lazy_scan_prune(vacrel, buf, blkno, page, vmbuffer, - blk_flags & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, + *blk_flags & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, &has_lpdead_items, &vm_page_frozen); /* @@ -1354,7 +1373,7 @@ lazy_scan_heap(LVRelState *vacrel) * freeze algorithm caps. */ if (got_cleanup_lock && - blk_flags & VAC_BLK_WAS_EAGER_SCANNED) + *blk_flags & VAC_BLK_WAS_EAGER_SCANNED) { /* Aggressive vacuums do not eager scan. */ Assert(!vacrel->aggressive); @@ -1439,8 +1458,12 @@ lazy_scan_heap(LVRelState *vacrel) if (BufferIsValid(vmbuffer)) ReleaseBuffer(vmbuffer); - /* report that everything is now scanned */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); + /* + * Report that everything is now scanned. We never skip scanning the last + * block in the relation, so we can pass rel_pages here. + */ + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, + rel_pages); /* now we can compute the new value for pg_class.reltuples */ vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, @@ -1455,6 +1478,8 @@ lazy_scan_heap(LVRelState *vacrel) Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + vacrel->missed_dead_tuples; + read_stream_end(stream); + /* * Do index vacuuming (call each index's ambulkdelete routine), then do * related heap vacuuming @@ -1465,12 +1490,14 @@ lazy_scan_heap(LVRelState *vacrel) /* * Vacuum the remainder of the Free Space Map. We must do this whether or * not there were indexes, and whether or not we bypassed index vacuuming. + * We can pass rel_pages here because we never skip scanning the last + * block of the relation. */ - if (blkno > next_fsm_block_to_vacuum) - FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); + if (rel_pages > next_fsm_block_to_vacuum) + FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages); /* report all blocks vacuumed */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages); /* Do final index cleanup (call each index's amvacuumcleanup routine) */ if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) @@ -1478,30 +1505,37 @@ lazy_scan_heap(LVRelState *vacrel) } /* - * heap_vac_scan_next_block() -- get next block for vacuum to process - * - * lazy_scan_heap() calls here every time it needs to get the next block to - * prune and vacuum. The function uses the visibility map, vacuum options, - * and various thresholds to skip blocks which do not need to be processed and - * sets blkno to the next block to process. - * - * The block number of the next block to process is set in *blkno and its - * visibility status and whether or not it was eager scanned is set in - * *blk_flags. - * - * The return value is false if there are no further blocks to process. - * - * vacrel is an in/out parameter here. Vacuum options and information about - * the relation are read. vacrel->skippedallvis is set if we skip a block - * that's all-visible but not all-frozen, to ensure that we don't update - * relfrozenxid in that case. vacrel also holds information about the next - * unskippable block, as bookkeeping for this function. + * heap_vac_scan_next_block() -- read stream callback to get the next block + * for vacuum to process + * + * Every time lazy_scan_heap() needs a new block to process during its first + * phase, it invokes read_stream_next_buffer() with a stream set up to call + * heap_vac_scan_next_block() to get the next block. + * + * heap_vac_scan_next_block() uses the visibility map, vacuum options, and + * various thresholds to skip blocks which do not need to be processed and + * returns the next block to process or InvalidBlockNumber if there are no + * remaining blocks. + * + * The visibility status of the next block to process and whether or not it + * was eager scanned is set in the per_buffer_data. + * + * callback_private_data contains a reference to the LVRelState, passed to the + * read stream API during stream setup. The LVRelState is an in/out parameter + * here (locally named `vacrel`). Vacuum options and information about the + * relation are read from it. vacrel->skippedallvis is set if we skip a block + * that's all-visible but not all-frozen (to ensure that we don't update + * relfrozenxid in that case). vacrel also holds information about the next + * unskippable block -- as bookkeeping for this function. */ -static bool -heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - uint8 *blk_flags) +static BlockNumber +heap_vac_scan_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) { BlockNumber next_block; + LVRelState *vacrel = callback_private_data; + uint8 *blk_flags = per_buffer_data; /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ next_block = vacrel->current_block + 1; @@ -1516,8 +1550,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, ReleaseBuffer(vacrel->next_unskippable_vmbuffer); vacrel->next_unskippable_vmbuffer = InvalidBuffer; } - *blkno = vacrel->rel_pages; - return false; + return InvalidBlockNumber; } /* @@ -1566,9 +1599,9 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, * but chose not to. We know that they are all-visible in the VM, * otherwise they would've been unskippable. */ - *blkno = vacrel->current_block = next_block; + vacrel->current_block = next_block; *blk_flags |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; - return true; + return vacrel->current_block; } else { @@ -1578,12 +1611,12 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, */ Assert(next_block == vacrel->next_unskippable_block); - *blkno = vacrel->current_block = next_block; + vacrel->current_block = next_block; if (vacrel->next_unskippable_allvis) *blk_flags |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; if (vacrel->next_unskippable_eager_scanned) *blk_flags |= VAC_BLK_WAS_EAGER_SCANNED; - return true; + return vacrel->current_block; } } -- 2.34.1