From 6f5a52a3bd7c018b42cbd7db1f9cad47d378c816 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Thu, 12 Jan 2023 22:04:20 +0900
Subject: [PATCH v17 12/12] Use TIDStore for storing dead tuple TID during lazy
 vacuum.

Previously, we used an array of ItemPointerData to store dead tuple
TIDs, which is not space efficient and slow to lookup. Also, we had
the 1GB limit on its size.

This changes to use TIDStore for this purpose. Since the TIDStore,
backed by the radix tree, incrementaly allocates the memory, we get
rid of the 1GB limit.

Also, since we are no longer able to exactly estimate the maximum
number of TIDs can be stored based on the amount of memory. It also
changes to the column names max_dead_tuples and num_dead_tuples and to
show the progress information in bytes.

Furthermore, since the TIDStore use the radix tree internally, the
minimum amount of memory required by TIDStore is 1MB, which is the
inital DSA segment size. Due to that, this change increase the minimum
maintenance_work_mem from 1MB to 2MB.
---
 doc/src/sgml/monitoring.sgml               |   8 +-
 src/backend/access/heap/vacuumlazy.c       | 169 +++++++--------------
 src/backend/catalog/system_views.sql       |   2 +-
 src/backend/commands/vacuum.c              |  76 +--------
 src/backend/commands/vacuumparallel.c      |  64 +++++---
 src/backend/storage/lmgr/lwlock.c          |   2 +
 src/backend/utils/misc/guc_tables.c        |   2 +-
 src/include/commands/progress.h            |   4 +-
 src/include/commands/vacuum.h              |  25 +--
 src/include/storage/lwlock.h               |   1 +
 src/test/regress/expected/cluster.out      |   2 +-
 src/test/regress/expected/create_index.out |   2 +-
 src/test/regress/expected/rules.out        |   4 +-
 src/test/regress/sql/cluster.sql           |   2 +-
 src/test/regress/sql/create_index.sql      |   2 +-
 15 files changed, 122 insertions(+), 243 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 358d2ff90f..6ce7ea9e35 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6840,10 +6840,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
 
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
-       <structfield>max_dead_tuples</structfield> <type>bigint</type>
+       <structfield>max_dead_tuple_bytes</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of dead tuples that we can store before needing to perform
+       Amount of dead tuple data that we can store before needing to perform
        an index vacuum cycle, based on
        <xref linkend="guc-maintenance-work-mem"/>.
       </para></entry>
@@ -6851,10 +6851,10 @@ FROM pg_stat_get_backend_idset() AS backendid;
 
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
-       <structfield>num_dead_tuples</structfield> <type>bigint</type>
+       <structfield>num_dead_tuple_bytes</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of dead tuples collected since the last index vacuum cycle.
+       Amount of dead tuple data collected since the last index vacuum cycle.
       </para></entry>
      </row>
     </tbody>
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index a42e881da3..1041e6640f 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -40,6 +40,7 @@
 #include "access/heapam_xlog.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/tidstore.h"
 #include "access/transam.h"
 #include "access/visibilitymap.h"
 #include "access/xact.h"
@@ -188,7 +189,7 @@ typedef struct LVRelState
 	 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
 	 * LP_UNUSED during second heap pass.
 	 */
-	VacDeadItems *dead_items;	/* TIDs whose index tuples we'll delete */
+	TidStore *dead_items;	/* TIDs whose index tuples we'll delete */
 	BlockNumber rel_pages;		/* total number of pages */
 	BlockNumber scanned_pages;	/* # pages examined (not skipped via VM) */
 	BlockNumber removed_pages;	/* # pages removed by relation truncation */
@@ -259,8 +260,9 @@ static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
 static void lazy_vacuum(LVRelState *vacrel);
 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
-static int	lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
-								  Buffer buffer, int index, Buffer *vmbuffer);
+static void	lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
+								  OffsetNumber *offsets, int num_offsets,
+								  Buffer buffer, Buffer *vmbuffer);
 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
 static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
@@ -825,21 +827,21 @@ lazy_scan_heap(LVRelState *vacrel)
 				blkno,
 				next_unskippable_block,
 				next_fsm_block_to_vacuum = 0;
-	VacDeadItems *dead_items = vacrel->dead_items;
+	TidStore *dead_items = vacrel->dead_items;
 	Buffer		vmbuffer = InvalidBuffer;
 	bool		next_unskippable_allvis,
 				skipping_current_range;
 	const int	initprog_index[] = {
 		PROGRESS_VACUUM_PHASE,
 		PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
-		PROGRESS_VACUUM_MAX_DEAD_TUPLES
+		PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
 	};
 	int64		initprog_val[3];
 
 	/* Report that we're scanning the heap, advertising total # of blocks */
 	initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
 	initprog_val[1] = rel_pages;
-	initprog_val[2] = dead_items->max_items;
+	initprog_val[2] = tidstore_max_memory(vacrel->dead_items);
 	pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
 
 	/* Set up an initial range of skippable blocks using the visibility map */
@@ -906,8 +908,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
 		 * this page.
 		 */
-		Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
-		if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
+		if (tidstore_is_full(vacrel->dead_items))
 		{
 			/*
 			 * Before beginning index vacuuming, we release any pin we may
@@ -1039,11 +1040,18 @@ lazy_scan_heap(LVRelState *vacrel)
 			if (prunestate.has_lpdead_items)
 			{
 				Size		freespace;
+				TidStoreIter *iter;
+				TidStoreIterResult *result;
 
-				lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);
+				iter = tidstore_begin_iterate(vacrel->dead_items);
+				result = tidstore_iterate_next(iter);
+				lazy_vacuum_heap_page(vacrel, blkno, result->offsets, result->num_offsets,
+									  buf, &vmbuffer);
+				Assert(!tidstore_iterate_next(iter));
+				tidstore_end_iterate(iter);
 
 				/* Forget the LP_DEAD items that we just vacuumed */
-				dead_items->num_items = 0;
+				tidstore_reset(dead_items);
 
 				/*
 				 * Periodically perform FSM vacuuming to make newly-freed
@@ -1080,7 +1088,7 @@ lazy_scan_heap(LVRelState *vacrel)
 			 * with prunestate-driven visibility map and FSM steps (just like
 			 * the two-pass strategy).
 			 */
-			Assert(dead_items->num_items == 0);
+			Assert(tidstore_num_tids(dead_items) == 0);
 		}
 
 		/*
@@ -1233,7 +1241,7 @@ lazy_scan_heap(LVRelState *vacrel)
 	 * Do index vacuuming (call each index's ambulkdelete routine), then do
 	 * related heap vacuuming
 	 */
-	if (dead_items->num_items > 0)
+	if (tidstore_num_tids(dead_items) > 0)
 		lazy_vacuum(vacrel);
 
 	/*
@@ -1871,23 +1879,15 @@ retry:
 	 */
 	if (lpdead_items > 0)
 	{
-		VacDeadItems *dead_items = vacrel->dead_items;
-		ItemPointerData tmp;
+		TidStore *dead_items = vacrel->dead_items;
 
 		vacrel->lpdead_item_pages++;
 		prunestate->has_lpdead_items = true;
 
-		ItemPointerSetBlockNumber(&tmp, blkno);
+		tidstore_add_tids(dead_items, blkno, deadoffsets, lpdead_items);
 
-		for (int i = 0; i < lpdead_items; i++)
-		{
-			ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
-			dead_items->items[dead_items->num_items++] = tmp;
-		}
-
-		Assert(dead_items->num_items <= dead_items->max_items);
-		pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
-									 dead_items->num_items);
+		pgstat_progress_update_param(PROGRESS_VACUUM_DEAD_TUPLE_BYTES,
+									 tidstore_memory_usage(dead_items));
 
 		/*
 		 * It was convenient to ignore LP_DEAD items in all_visible earlier on
@@ -2107,8 +2107,7 @@ lazy_scan_noprune(LVRelState *vacrel,
 	}
 	else
 	{
-		VacDeadItems *dead_items = vacrel->dead_items;
-		ItemPointerData tmp;
+		TidStore *dead_items = vacrel->dead_items;
 
 		/*
 		 * Page has LP_DEAD items, and so any references/TIDs that remain in
@@ -2117,17 +2116,10 @@ lazy_scan_noprune(LVRelState *vacrel,
 		 */
 		vacrel->lpdead_item_pages++;
 
-		ItemPointerSetBlockNumber(&tmp, blkno);
+		tidstore_add_tids(dead_items, blkno, deadoffsets, lpdead_items);
 
-		for (int i = 0; i < lpdead_items; i++)
-		{
-			ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
-			dead_items->items[dead_items->num_items++] = tmp;
-		}
-
-		Assert(dead_items->num_items <= dead_items->max_items);
-		pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
-									 dead_items->num_items);
+		pgstat_progress_update_param(PROGRESS_VACUUM_DEAD_TUPLE_BYTES,
+									 tidstore_memory_usage(dead_items));
 
 		vacrel->lpdead_items += lpdead_items;
 
@@ -2176,7 +2168,7 @@ lazy_vacuum(LVRelState *vacrel)
 	if (!vacrel->do_index_vacuuming)
 	{
 		Assert(!vacrel->do_index_cleanup);
-		vacrel->dead_items->num_items = 0;
+		tidstore_reset(vacrel->dead_items);
 		return;
 	}
 
@@ -2205,7 +2197,7 @@ lazy_vacuum(LVRelState *vacrel)
 		BlockNumber threshold;
 
 		Assert(vacrel->num_index_scans == 0);
-		Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
+		Assert(vacrel->lpdead_items == tidstore_num_tids(vacrel->dead_items));
 		Assert(vacrel->do_index_vacuuming);
 		Assert(vacrel->do_index_cleanup);
 
@@ -2232,8 +2224,8 @@ lazy_vacuum(LVRelState *vacrel)
 		 * cases then this may need to be reconsidered.
 		 */
 		threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
-		bypass = (vacrel->lpdead_item_pages < threshold &&
-				  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
+		bypass = (vacrel->lpdead_item_pages < threshold) &&
+			tidstore_memory_usage(vacrel->dead_items) < (32L * 1024L * 1024L);
 	}
 
 	if (bypass)
@@ -2278,7 +2270,7 @@ lazy_vacuum(LVRelState *vacrel)
 	 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
 	 * vacuum)
 	 */
-	vacrel->dead_items->num_items = 0;
+	tidstore_reset(vacrel->dead_items);
 }
 
 /*
@@ -2351,7 +2343,7 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 	 * place).
 	 */
 	Assert(vacrel->num_index_scans > 0 ||
-		   vacrel->dead_items->num_items == vacrel->lpdead_items);
+		   tidstore_num_tids(vacrel->dead_items) == vacrel->lpdead_items);
 	Assert(allindexes || vacrel->failsafe_active);
 
 	/*
@@ -2388,10 +2380,11 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
 static void
 lazy_vacuum_heap_rel(LVRelState *vacrel)
 {
-	int			index;
 	BlockNumber vacuumed_pages;
 	Buffer		vmbuffer = InvalidBuffer;
 	LVSavedErrInfo saved_err_info;
+	TidStoreIter *iter;
+	TidStoreIterResult *result;
 
 	Assert(vacrel->do_index_vacuuming);
 	Assert(vacrel->do_index_cleanup);
@@ -2408,8 +2401,8 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 
 	vacuumed_pages = 0;
 
-	index = 0;
-	while (index < vacrel->dead_items->num_items)
+	iter = tidstore_begin_iterate(vacrel->dead_items);
+	while ((result = tidstore_iterate_next(iter)) != NULL)
 	{
 		BlockNumber tblk;
 		Buffer		buf;
@@ -2418,12 +2411,13 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 
 		vacuum_delay_point();
 
-		tblk = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
+		tblk = result->blkno;
 		vacrel->blkno = tblk;
 		buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 								 vacrel->bstrategy);
 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-		index = lazy_vacuum_heap_page(vacrel, tblk, buf, index, &vmbuffer);
+		lazy_vacuum_heap_page(vacrel, tblk, result->offsets, result->num_offsets,
+							  buf, &vmbuffer);
 
 		/* Now that we've vacuumed the page, record its available space */
 		page = BufferGetPage(buf);
@@ -2433,6 +2427,7 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 		RecordPageWithFreeSpace(vacrel->rel, tblk, freespace);
 		vacuumed_pages++;
 	}
+	tidstore_end_iterate(iter);
 
 	/* Clear the block number information */
 	vacrel->blkno = InvalidBlockNumber;
@@ -2447,14 +2442,13 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
 	 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
 	 * the second heap pass.  No more, no less.
 	 */
-	Assert(index > 0);
 	Assert(vacrel->num_index_scans > 1 ||
-		   (index == vacrel->lpdead_items &&
+		   (tidstore_num_tids(vacrel->dead_items) == vacrel->lpdead_items &&
 			vacuumed_pages == vacrel->lpdead_item_pages));
 
 	ereport(DEBUG2,
-			(errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
-					vacrel->relname, (long long) index, vacuumed_pages)));
+			(errmsg("table \"%s\": removed " UINT64_FORMAT "dead item identifiers in %u pages",
+					vacrel->relname, tidstore_num_tids(vacrel->dead_items), vacuumed_pages)));
 
 	/* Revert to the previous phase information for error traceback */
 	restore_vacuum_error_info(vacrel, &saved_err_info);
@@ -2471,11 +2465,10 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
  * LP_DEAD item on the page.  The return value is the first index immediately
  * after all LP_DEAD items for the same page in the array.
  */
-static int
-lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
-					  int index, Buffer *vmbuffer)
+static void
+lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
+					  int num_offsets, Buffer buffer, Buffer *vmbuffer)
 {
-	VacDeadItems *dead_items = vacrel->dead_items;
 	Page		page = BufferGetPage(buffer);
 	OffsetNumber unused[MaxHeapTuplesPerPage];
 	int			uncnt = 0;
@@ -2494,16 +2487,11 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 
 	START_CRIT_SECTION();
 
-	for (; index < dead_items->num_items; index++)
+	for (int i = 0; i < num_offsets; i++)
 	{
-		BlockNumber tblk;
-		OffsetNumber toff;
 		ItemId		itemid;
+		OffsetNumber	toff = offsets[i];
 
-		tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
-		if (tblk != blkno)
-			break;				/* past end of tuples for this block */
-		toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
 		itemid = PageGetItemId(page, toff);
 
 		Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
@@ -2583,7 +2571,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 
 	/* Revert to the previous phase information for error traceback */
 	restore_vacuum_error_info(vacrel, &saved_err_info);
-	return index;
 }
 
 /*
@@ -3079,46 +3066,6 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
 	return vacrel->nonempty_pages;
 }
 
-/*
- * Returns the number of dead TIDs that VACUUM should allocate space to
- * store, given a heap rel of size vacrel->rel_pages, and given current
- * maintenance_work_mem setting (or current autovacuum_work_mem setting,
- * when applicable).
- *
- * See the comments at the head of this file for rationale.
- */
-static int
-dead_items_max_items(LVRelState *vacrel)
-{
-	int64		max_items;
-	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
-	autovacuum_work_mem != -1 ?
-	autovacuum_work_mem : maintenance_work_mem;
-
-	if (vacrel->nindexes > 0)
-	{
-		BlockNumber rel_pages = vacrel->rel_pages;
-
-		max_items = MAXDEADITEMS(vac_work_mem * 1024L);
-		max_items = Min(max_items, INT_MAX);
-		max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
-
-		/* curious coding here to ensure the multiplication can't overflow */
-		if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
-			max_items = rel_pages * MaxHeapTuplesPerPage;
-
-		/* stay sane if small maintenance_work_mem */
-		max_items = Max(max_items, MaxHeapTuplesPerPage);
-	}
-	else
-	{
-		/* One-pass case only stores a single heap page's TIDs at a time */
-		max_items = MaxHeapTuplesPerPage;
-	}
-
-	return (int) max_items;
-}
-
 /*
  * Allocate dead_items (either using palloc, or in dynamic shared memory).
  * Sets dead_items in vacrel for caller.
@@ -3129,11 +3076,9 @@ dead_items_max_items(LVRelState *vacrel)
 static void
 dead_items_alloc(LVRelState *vacrel, int nworkers)
 {
-	VacDeadItems *dead_items;
-	int			max_items;
-
-	max_items = dead_items_max_items(vacrel);
-	Assert(max_items >= MaxHeapTuplesPerPage);
+	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
+		autovacuum_work_mem != -1 ?
+		autovacuum_work_mem * 1024L : maintenance_work_mem * 1024L;
 
 	/*
 	 * Initialize state for a parallel vacuum.  As of now, only one worker can
@@ -3160,7 +3105,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 		else
 			vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
 											   vacrel->nindexes, nworkers,
-											   max_items,
+											   vac_work_mem,
 											   vacrel->verbose ? INFO : DEBUG2,
 											   vacrel->bstrategy);
 
@@ -3173,11 +3118,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 	}
 
 	/* Serial VACUUM case */
-	dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
-	dead_items->max_items = max_items;
-	dead_items->num_items = 0;
-
-	vacrel->dead_items = dead_items;
+	vacrel->dead_items = tidstore_create(vac_work_mem, NULL);
 }
 
 /*
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 447c9b970f..133e03d728 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1165,7 +1165,7 @@ CREATE VIEW pg_stat_progress_vacuum AS
                       END AS phase,
         S.param2 AS heap_blks_total, S.param3 AS heap_blks_scanned,
         S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count,
-        S.param6 AS max_dead_tuples, S.param7 AS num_dead_tuples
+        S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes
     FROM pg_stat_get_progress_info('VACUUM') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index c4ed7efce3..7de4350cde 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -95,7 +95,6 @@ static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
 static double compute_parallel_delay(void);
 static VacOptValue get_vacoptval_from_boolean(DefElem *def);
 static bool vac_tid_reaped(ItemPointer itemptr, void *state);
-static int	vac_cmp_itemptr(const void *left, const void *right);
 
 /*
  * Primary entry point for manual VACUUM and ANALYZE commands
@@ -2298,16 +2297,16 @@ get_vacoptval_from_boolean(DefElem *def)
  */
 IndexBulkDeleteResult *
 vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
-					  VacDeadItems *dead_items)
+					  TidStore *dead_items)
 {
 	/* Do bulk deletion */
 	istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
 							  (void *) dead_items);
 
 	ereport(ivinfo->message_level,
-			(errmsg("scanned index \"%s\" to remove %d row versions",
+			(errmsg("scanned index \"%s\" to remove " UINT64_FORMAT " row versions",
 					RelationGetRelationName(ivinfo->index),
-					dead_items->num_items)));
+					tidstore_num_tids(dead_items))));
 
 	return istat;
 }
@@ -2338,18 +2337,6 @@ vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
 	return istat;
 }
 
-/*
- * Returns the total required space for VACUUM's dead_items array given a
- * max_items value.
- */
-Size
-vac_max_items_to_alloc_size(int max_items)
-{
-	Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
-
-	return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
-}
-
 /*
  *	vac_tid_reaped() -- is a particular tid deletable?
  *
@@ -2360,60 +2347,7 @@ vac_max_items_to_alloc_size(int max_items)
 static bool
 vac_tid_reaped(ItemPointer itemptr, void *state)
 {
-	VacDeadItems *dead_items = (VacDeadItems *) state;
-	int64		litem,
-				ritem,
-				item;
-	ItemPointer res;
-
-	litem = itemptr_encode(&dead_items->items[0]);
-	ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
-	item = itemptr_encode(itemptr);
-
-	/*
-	 * Doing a simple bound check before bsearch() is useful to avoid the
-	 * extra cost of bsearch(), especially if dead items on the heap are
-	 * concentrated in a certain range.  Since this function is called for
-	 * every index tuple, it pays to be really fast.
-	 */
-	if (item < litem || item > ritem)
-		return false;
-
-	res = (ItemPointer) bsearch((void *) itemptr,
-								(void *) dead_items->items,
-								dead_items->num_items,
-								sizeof(ItemPointerData),
-								vac_cmp_itemptr);
-
-	return (res != NULL);
-}
-
-/*
- * Comparator routines for use with qsort() and bsearch().
- */
-static int
-vac_cmp_itemptr(const void *left, const void *right)
-{
-	BlockNumber lblk,
-				rblk;
-	OffsetNumber loff,
-				roff;
-
-	lblk = ItemPointerGetBlockNumber((ItemPointer) left);
-	rblk = ItemPointerGetBlockNumber((ItemPointer) right);
-
-	if (lblk < rblk)
-		return -1;
-	if (lblk > rblk)
-		return 1;
-
-	loff = ItemPointerGetOffsetNumber((ItemPointer) left);
-	roff = ItemPointerGetOffsetNumber((ItemPointer) right);
-
-	if (loff < roff)
-		return -1;
-	if (loff > roff)
-		return 1;
+	TidStore *dead_items = (TidStore *) state;
 
-	return 0;
+	return tidstore_lookup_tid(dead_items, itemptr);
 }
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index bcd40c80a1..4c0ce4b7e6 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -44,7 +44,7 @@
  * use small integers.
  */
 #define PARALLEL_VACUUM_KEY_SHARED			1
-#define PARALLEL_VACUUM_KEY_DEAD_ITEMS		2
+#define PARALLEL_VACUUM_KEY_DSA				2
 #define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE	4
 #define PARALLEL_VACUUM_KEY_WAL_USAGE		5
@@ -103,6 +103,9 @@ typedef struct PVShared
 
 	/* Counter for vacuuming and cleanup */
 	pg_atomic_uint32 idx;
+
+	/* Handle of the shared TidStore */
+	tidstore_handle	dead_items_handle;
 } PVShared;
 
 /* Status used during parallel index vacuum or cleanup */
@@ -166,7 +169,8 @@ struct ParallelVacuumState
 	PVIndStats *indstats;
 
 	/* Shared dead items space among parallel vacuum workers */
-	VacDeadItems *dead_items;
+	TidStore *dead_items;
+	dsa_area *dead_items_area;
 
 	/* Points to buffer usage area in DSM */
 	BufferUsage *buffer_usage;
@@ -222,20 +226,23 @@ static void parallel_vacuum_error_callback(void *arg);
  */
 ParallelVacuumState *
 parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
-					 int nrequested_workers, int max_items,
-					 int elevel, BufferAccessStrategy bstrategy)
+					 int nrequested_workers, int vac_work_mem,
+					 int elevel,
+					 BufferAccessStrategy bstrategy)
 {
 	ParallelVacuumState *pvs;
 	ParallelContext *pcxt;
 	PVShared   *shared;
-	VacDeadItems *dead_items;
+	TidStore	*dead_items;
 	PVIndStats *indstats;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
+	void		*area_space;
+	dsa_area	*dead_items_dsa;
 	bool	   *will_parallel_vacuum;
 	Size		est_indstats_len;
 	Size		est_shared_len;
-	Size		est_dead_items_len;
+	Size		dsa_minsize = dsa_minimum_size();
 	int			nindexes_mwm = 0;
 	int			parallel_workers = 0;
 	int			querylen;
@@ -283,9 +290,8 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 
-	/* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
-	est_dead_items_len = vac_max_items_to_alloc_size(max_items);
-	shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
+	/* Estimate size for dead tuple DSA -- PARALLEL_VACUUM_KEY_DSA */
+	shm_toc_estimate_chunk(&pcxt->estimator, dsa_minsize);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 
 	/*
@@ -351,6 +357,16 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_INDEX_STATS, indstats);
 	pvs->indstats = indstats;
 
+	/* Prepare DSA space for dead items */
+	area_space = shm_toc_allocate(pcxt->toc, dsa_minsize);
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DSA, area_space);
+	dead_items_dsa = dsa_create_in_place(area_space, dsa_minsize,
+										 LWTRANCHE_PARALLEL_VACUUM_DSA,
+										 pcxt->seg);
+	dead_items = tidstore_create(vac_work_mem, dead_items_dsa);
+	pvs->dead_items = dead_items;
+	pvs->dead_items_area = dead_items_dsa;
+
 	/* Prepare shared information */
 	shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
 	MemSet(shared, 0, est_shared_len);
@@ -360,6 +376,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 		(nindexes_mwm > 0) ?
 		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
 		maintenance_work_mem;
+	shared->dead_items_handle = tidstore_get_handle(dead_items);
 
 	pg_atomic_init_u32(&(shared->cost_balance), 0);
 	pg_atomic_init_u32(&(shared->active_nworkers), 0);
@@ -368,15 +385,6 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
 	pvs->shared = shared;
 
-	/* Prepare the dead_items space */
-	dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc,
-												   est_dead_items_len);
-	dead_items->max_items = max_items;
-	dead_items->num_items = 0;
-	MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
-	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items);
-	pvs->dead_items = dead_items;
-
 	/*
 	 * Allocate space for each worker's BufferUsage and WalUsage; no need to
 	 * initialize
@@ -434,6 +442,9 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
 			istats[i] = NULL;
 	}
 
+	tidstore_destroy(pvs->dead_items);
+	dsa_detach(pvs->dead_items_area);
+
 	DestroyParallelContext(pvs->pcxt);
 	ExitParallelMode();
 
@@ -442,7 +453,7 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
 }
 
 /* Returns the dead items space */
-VacDeadItems *
+TidStore *
 parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
 {
 	return pvs->dead_items;
@@ -940,7 +951,9 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	Relation   *indrels;
 	PVIndStats *indstats;
 	PVShared   *shared;
-	VacDeadItems *dead_items;
+	TidStore	*dead_items;
+	void		*area_space;
+	dsa_area	*dead_items_area;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
 	int			nindexes;
@@ -984,10 +997,10 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 											 PARALLEL_VACUUM_KEY_INDEX_STATS,
 											 false);
 
-	/* Set dead_items space */
-	dead_items = (VacDeadItems *) shm_toc_lookup(toc,
-												 PARALLEL_VACUUM_KEY_DEAD_ITEMS,
-												 false);
+	/* Set dead items */
+	area_space = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_DSA, false);
+	dead_items_area = dsa_attach_in_place(area_space, seg);
+	dead_items = tidstore_attach(dead_items_area, shared->dead_items_handle);
 
 	/* Set cost-based vacuum delay */
 	VacuumCostActive = (VacuumCostDelay > 0);
@@ -1033,6 +1046,9 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
 						  &wal_usage[ParallelWorkerNumber]);
 
+	tidstore_detach(pvs.dead_items);
+	dsa_detach(dead_items_area);
+
 	/* Pop the error context stack */
 	error_context_stack = errcallback.previous;
 
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 196bece0a3..ff75fae88a 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -186,6 +186,8 @@ static const char *const BuiltinTrancheNames[] = {
 	"PgStatsHash",
 	/* LWTRANCHE_PGSTATS_DATA: */
 	"PgStatsData",
+	/* LWTRANCHE_PARALLEL_VACUUM_DSA: */
+	"ParallelVacuumDSA",
 };
 
 StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 92545b4958..3f8a5bc582 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2301,7 +2301,7 @@ struct config_int ConfigureNamesInt[] =
 			GUC_UNIT_KB
 		},
 		&maintenance_work_mem,
-		65536, 1024, MAX_KILOBYTES,
+		65536, 2048, MAX_KILOBYTES,
 		NULL, NULL, NULL
 	},
 
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index e5add41352..b209d3cf84 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -23,8 +23,8 @@
 #define PROGRESS_VACUUM_HEAP_BLKS_SCANNED		2
 #define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED		3
 #define PROGRESS_VACUUM_NUM_INDEX_VACUUMS		4
-#define PROGRESS_VACUUM_MAX_DEAD_TUPLES			5
-#define PROGRESS_VACUUM_NUM_DEAD_TUPLES			6
+#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES	5
+#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES		6
 
 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */
 #define PROGRESS_VACUUM_PHASE_SCAN_HEAP			1
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 689dbb7702..220d89fff7 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -17,6 +17,7 @@
 #include "access/htup.h"
 #include "access/genam.h"
 #include "access/parallel.h"
+#include "access/tidstore.h"
 #include "catalog/pg_class.h"
 #include "catalog/pg_statistic.h"
 #include "catalog/pg_type.h"
@@ -276,21 +277,6 @@ struct VacuumCutoffs
 	MultiXactId MultiXactCutoff;
 };
 
-/*
- * VacDeadItems stores TIDs whose index tuples are deleted by index vacuuming.
- */
-typedef struct VacDeadItems
-{
-	int			max_items;		/* # slots allocated in array */
-	int			num_items;		/* current # of entries */
-
-	/* Sorted array of TIDs to delete from indexes */
-	ItemPointerData items[FLEXIBLE_ARRAY_MEMBER];
-} VacDeadItems;
-
-#define MAXDEADITEMS(avail_mem) \
-	(((avail_mem) - offsetof(VacDeadItems, items)) / sizeof(ItemPointerData))
-
 /* GUC parameters */
 extern PGDLLIMPORT int default_statistics_target;	/* PGDLLIMPORT for PostGIS */
 extern PGDLLIMPORT int vacuum_freeze_min_age;
@@ -339,18 +325,17 @@ extern Relation vacuum_open_relation(Oid relid, RangeVar *relation,
 									 LOCKMODE lmode);
 extern IndexBulkDeleteResult *vac_bulkdel_one_index(IndexVacuumInfo *ivinfo,
 													IndexBulkDeleteResult *istat,
-													VacDeadItems *dead_items);
+													TidStore *dead_items);
 extern IndexBulkDeleteResult *vac_cleanup_one_index(IndexVacuumInfo *ivinfo,
 													IndexBulkDeleteResult *istat);
-extern Size vac_max_items_to_alloc_size(int max_items);
 
 /* in commands/vacuumparallel.c */
 extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels,
 												 int nindexes, int nrequested_workers,
-												 int max_items, int elevel,
-												 BufferAccessStrategy bstrategy);
+												 int vac_work_mem,
+												 int elevel, BufferAccessStrategy bstrategy);
 extern void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats);
-extern VacDeadItems *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs);
+extern TidStore *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs);
 extern void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs,
 												long num_table_tuples,
 												int num_index_scans);
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index e4162db613..40dda03088 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -204,6 +204,7 @@ typedef enum BuiltinTrancheIds
 	LWTRANCHE_PGSTATS_DSA,
 	LWTRANCHE_PGSTATS_HASH,
 	LWTRANCHE_PGSTATS_DATA,
+	LWTRANCHE_PARALLEL_VACUUM_DSA,
 	LWTRANCHE_FIRST_USER_DEFINED
 }			BuiltinTrancheIds;
 
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index 542c2e098c..e678e6f79e 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -524,7 +524,7 @@ create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
 -- ensure we don't use the index in CLUSTER nor the checking SELECTs
 set enable_indexscan = off;
 -- Use external sort:
-set maintenance_work_mem = '1MB';
+set maintenance_work_mem = '2MB';
 cluster clstr_4 using cluster_sort;
 select * from
 (select hundred, lag(hundred) over () as lhundred,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 6cd57e3eaa..d1889b9d10 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1214,7 +1214,7 @@ DROP TABLE unlogged_hash_table;
 -- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops);
 -- Test hash index build tuplesorting.  Force hash tuplesort using low
 -- maintenance_work_mem setting and fillfactor:
-SET maintenance_work_mem = '1MB';
+SET maintenance_work_mem = '2MB';
 CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index fb9f936d43..0c49354f04 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2020,8 +2020,8 @@ pg_stat_progress_vacuum| SELECT s.pid,
     s.param3 AS heap_blks_scanned,
     s.param4 AS heap_blks_vacuumed,
     s.param5 AS index_vacuum_count,
-    s.param6 AS max_dead_tuples,
-    s.param7 AS num_dead_tuples
+    s.param6 AS max_dead_tuple_bytes,
+    s.param7 AS dead_tuple_bytes
    FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_recovery_prefetch| SELECT s.stats_reset,
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 6cb9c926c0..a795d705d5 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -256,7 +256,7 @@ create index cluster_sort on clstr_4 (hundred, thousand, tenthous);
 set enable_indexscan = off;
 
 -- Use external sort:
-set maintenance_work_mem = '1MB';
+set maintenance_work_mem = '2MB';
 cluster clstr_4 using cluster_sort;
 select * from
 (select hundred, lag(hundred) over () as lhundred,
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index a3738833b2..edb5e4b4f3 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -367,7 +367,7 @@ DROP TABLE unlogged_hash_table;
 
 -- Test hash index build tuplesorting.  Force hash tuplesort using low
 -- maintenance_work_mem setting and fillfactor:
-SET maintenance_work_mem = '1MB';
+SET maintenance_work_mem = '2MB';
 CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
-- 
2.31.1

