On Mon, Jun 28, 2021 at 11:22:01AM +0300, Anna Akenteva wrote:
> On 2019-11-29 05:32, Michael Paquier wrote:
> > These comments are unanswered for more than 2 months, so I am marking
> > this entry as returned with feedback.
> 
> I'd like to revive this patch. To make further work easier, attaching a
> rebased version of the latest patch by Alexander.
> 
> As for having yet another copy of logic determining visibility: the
> visibility check was mainly taken from heap_page_is_all_visible(), so I
> refactored the code to make sure that logic is not duplicated. The updated
> patch is attached too.

I agree that it's strange that VACUUM(FREEZE) freezes tuples but not VM bits,
nor page-level PD_ALL_VISIBLE (which is implied by all frozen).  After FULL
runs (taking an exclusive lock on the table), it's necessary to then vacuum the
table again to get what's intended.

Rebased on f10f0ae420ee62400876ab34dca2c09c20dcd030.

And rephrased Anna's two independent/alternate patches as a 2nd patch on top of
the 1st, as that helps me to read it and reduces its total size.

I noticed in testing the patch that autovacuum is still hitting the relation
shortly after vacuum full.  This is due to n_ins_since_autovacuum, which is new
in pg13.  I don't know how to address that (or even if it should be addressed
at all).

Also, pg_class.relallvisible is not set unless vacuum/analyze is run again
(which is mitigated by the n_ins behavior above).  It seems like this might be
important: an plan which uses index-only scan might be missed in favour of
something else until autovacuum runs (it will use cost-based delay, and might
not be scheduled immediately, could be interrupted, or even diasbled).

I'm testing like this:
CREATE EXTENSION IF NOT EXISTS pg_visibility ; DROP TABLE t; CREATE TABLE t AS 
SELECT generate_series(1,99999); VACUUM FULL t; ANALYZE t; SELECT 
n_ins_since_vacuum, n_tup_upd, n_tup_del, n_tup_hot_upd FROM pg_stat_all_tables 
WHERE relname='t'; SELECT relpages, relallvisible FROM pg_class WHERE 
oid='t'::regclass; SELECT COUNT(1), COUNT(1)FILTER(WHERE all_visible='t') 
allvis, COUNT(1)FILTER(WHERE all_frozen='t') allfrz, COUNT(1)FILTER(WHERE 
pd_all_visible='t') allvispd FROM pg_visibility('t');

-- 
Justin
>From 4ec10accb276d9ecde461c6cde0be5a610ed3dc6 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 15:57:09 -0500
Subject: [PATCH 1/3] VACUUM FULL/CLUSTER: set VM and page-level visibility
 bits

From: 	Alexander Korotkov
write-vm-during-cluster-3-rebase
---
 src/backend/access/heap/rewriteheap.c   | 131 +++++++++++++++++++++++-
 src/backend/access/heap/visibilitymap.c |  18 ----
 src/include/access/visibilitymap.h      |  18 ++++
 3 files changed, 147 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 986a776bbd..af0d1b2d0c 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -110,6 +110,7 @@
 #include "access/heaptoast.h"
 #include "access/rewriteheap.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/xloginsert.h"
 #include "catalog/catalog.h"
@@ -152,6 +153,11 @@ typedef struct RewriteStateData
 	HTAB	   *rs_old_new_tid_map; /* unmatched B tuples */
 	HTAB	   *rs_logical_mappings;	/* logical remapping files */
 	uint32		rs_num_rewrite_mappings;	/* # in memory mappings */
+	Page		rs_vm_buffer;	/* visibility map page */
+	BlockNumber rs_vm_blockno;	/* block number of visibility page */
+	BlockNumber rs_vm_buffer_valid;	/* T if any bits are set */
+	bool		rs_all_visible;	/* all visible flag for rs_buffer */
+	bool		rs_all_frozen;	/* all frozen flag for rs_buffer */
 }			RewriteStateData;
 
 /*
@@ -213,6 +219,9 @@ typedef struct RewriteMappingDataEntry
 
 
 /* prototypes for internal functions */
+static void rewrite_flush_vm_page(RewriteState state);
+static void rewrite_set_vm_flags(RewriteState state);
+static void rewrite_update_vm_flags(RewriteState state, HeapTuple tuple);
 static void raw_heap_insert(RewriteState state, HeapTuple tup);
 
 /* internal logical remapping prototypes */
@@ -264,6 +273,11 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
 	state->rs_freeze_xid = freeze_xid;
 	state->rs_cutoff_multi = cutoff_multi;
 	state->rs_cxt = rw_cxt;
+	state->rs_vm_buffer = (Page) palloc(BLCKSZ);
+	state->rs_vm_blockno = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+	state->rs_vm_buffer_valid = false;
+	state->rs_all_visible = true;
+	state->rs_all_frozen = true;
 
 	/* Initialize hash tables used to track update chains */
 	hash_ctl.keysize = sizeof(TidHashKey);
@@ -284,6 +298,9 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
 					&hash_ctl,
 					HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
 
+	if (!smgrexists(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM))
+		smgrcreate(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM, false);
+
 	MemoryContextSwitchTo(old_cxt);
 
 	logical_begin_heap_rewrite(state);
@@ -317,6 +334,9 @@ end_heap_rewrite(RewriteState state)
 	/* Write the last page, if any */
 	if (state->rs_buffer_valid)
 	{
+		if (state->rs_all_visible)
+			PageSetAllVisible(state->rs_buffer);
+
 		if (RelationNeedsWAL(state->rs_new_rel))
 			log_newpage(&state->rs_new_rel->rd_node,
 						MAIN_FORKNUM,
@@ -326,10 +346,16 @@ end_heap_rewrite(RewriteState state)
 
 		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
 
-		smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
-				   state->rs_blockno, (char *) state->rs_buffer, true);
+		smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM, state->rs_blockno,
+				   (char *) state->rs_buffer, true);
+
+		rewrite_set_vm_flags(state);
 	}
 
+	/* Write the last VM page too */
+	if (state->rs_vm_buffer_valid)
+		rewrite_flush_vm_page(state);
+
 	/*
 	 * When we WAL-logged rel pages, we must nonetheless fsync them.  The
 	 * reason is the same as in storage.c's RelationCopyStorage(): we're
@@ -338,7 +364,11 @@ end_heap_rewrite(RewriteState state)
 	 * wrote before the checkpoint.
 	 */
 	if (RelationNeedsWAL(state->rs_new_rel))
+	{
+		/* for an empty table, this could be first smgr access */
 		smgrimmedsync(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM);
+		smgrimmedsync(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM);
+	}
 
 	logical_end_heap_rewrite(state);
 
@@ -346,6 +376,97 @@ end_heap_rewrite(RewriteState state)
 	MemoryContextDelete(state->rs_cxt);
 }
 
+/* Write contents of VM page */
+static void
+rewrite_flush_vm_page(RewriteState state)
+{
+	Assert(state->rs_vm_buffer_valid);
+
+	if (RelationNeedsWAL(state->rs_new_rel))
+		log_newpage(&state->rs_new_rel->rd_node,
+					VISIBILITYMAP_FORKNUM,
+					state->rs_vm_blockno,
+					state->rs_vm_buffer,
+					true);
+
+	PageSetChecksumInplace(state->rs_vm_buffer, state->rs_vm_blockno);
+
+	smgrextend(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM,
+			   state->rs_vm_blockno, (char *) state->rs_vm_buffer, true);
+
+	state->rs_vm_buffer_valid = false;
+}
+
+/* Set VM flags to the VM page */
+static void
+rewrite_set_vm_flags(RewriteState state)
+{
+	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+	uint32		mapByte = HEAPBLK_TO_MAPBYTE(state->rs_blockno);
+	uint8		mapOffset = HEAPBLK_TO_OFFSET(state->rs_blockno);
+	char	   *map;
+	uint8		flags;
+
+	if (mapBlock != state->rs_vm_blockno && state->rs_vm_buffer_valid)
+		rewrite_flush_vm_page(state);
+
+	if (!state->rs_vm_buffer_valid)
+	{
+		PageInit(state->rs_vm_buffer, BLCKSZ, 0);
+		state->rs_vm_blockno = mapBlock;
+		state->rs_vm_buffer_valid = true;
+	}
+
+	flags = (state->rs_all_visible ? VISIBILITYMAP_ALL_VISIBLE : 0) |
+			(state->rs_all_frozen ? VISIBILITYMAP_ALL_FROZEN : 0);
+
+	map = PageGetContents(state->rs_vm_buffer);
+	map[mapByte] |= (flags << mapOffset);
+}
+
+/*
+ * Update rs_all_visible and rs_all_frozen flags according to the tuple.  We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+static void
+rewrite_update_vm_flags(RewriteState state, HeapTuple tuple)
+{
+	TransactionId	xmin;
+
+	if (!state->rs_all_visible)
+		return;
+
+	if (!HeapTupleHeaderXminCommitted(tuple->t_data))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+	if (!TransactionIdPrecedes(xmin, state->rs_oldest_xmin))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+		!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	if (!state->rs_all_frozen)
+		return;
+
+	if (heap_tuple_needs_eventual_freeze(tuple->t_data))
+		state->rs_all_frozen = false;
+}
+
 /*
  * Add a tuple to the new heap.
  *
@@ -472,6 +593,7 @@ rewrite_heap_tuple(RewriteState state,
 
 		/* Insert the tuple and find out where it's put in new_heap */
 		raw_heap_insert(state, new_tuple);
+		rewrite_update_vm_flags(state, new_tuple);
 		new_tid = new_tuple->t_self;
 
 		logical_rewrite_heap_tuple(state, old_tid, new_tuple);
@@ -677,6 +799,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			 * enforce saveFreeSpace unconditionally.
 			 */
 
+			if (state->rs_all_visible)
+				PageSetAllVisible(page);
+
 			/* XLOG stuff */
 			if (RelationNeedsWAL(state->rs_new_rel))
 				log_newpage(&state->rs_new_rel->rd_node,
@@ -695,6 +820,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
 					   state->rs_blockno, (char *) page, true);
 
+			rewrite_set_vm_flags(state);
+
 			state->rs_blockno++;
 			state->rs_buffer_valid = false;
 		}
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 114fbbdd30..dd0e686a9f 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -99,24 +99,6 @@
 
 /*#define TRACE_VISIBILITYMAP */
 
-/*
- * Size of the bitmap on each visibility map page, in bytes. There's no
- * extra headers, so the whole page minus the standard page header is
- * used for the bitmap.
- */
-#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-
-/* Number of heap blocks we can represent in one byte */
-#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
-
-/* Number of heap blocks we can represent in one visibility map page. */
-#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
-
-/* Mapping from heap block number to the right bit in the visibility map */
-#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
-#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-
 /* Masks for counting subsets of bits in the visibility map. */
 #define VISIBLE_MASK64	UINT64CONST(0x5555555555555555) /* The lower bit of each
 														 * bit pair */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 57362c3687..39f3d59156 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -34,6 +34,24 @@
 #define VM_ALL_FROZEN(r, b, v) \
 	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
 
+/*
+ * Size of the bitmap on each visibility map page, in bytes. There's no
+ * extra headers, so the whole page minus the standard page header is
+ * used for the bitmap.
+ */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one visibility map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the visibility map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
+
 extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
 								Buffer vmbuf, uint8 flags);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
-- 
2.17.0

>From 3454aceee4c1806664b3932e0217fc50d71be52d Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 16:01:13 -0500
Subject: [PATCH 2/3] refactor the code to make sure that logic is not
 duplicated.

0001-write-vm-during-cluster-4.patch
Date: Mon, 28 Jun 2021 11:22:01 +0300
From: Anna Akenteva <a.akent...@postgrespro.ru>
---
 src/backend/access/heap/heapam_handler.c | 19 +++---
 src/backend/access/heap/rewriteheap.c    | 12 +++-
 src/backend/access/heap/vacuumlazy.c     | 77 ++++++++++++++++--------
 src/backend/utils/sort/tuplesort.c       |  9 ++-
 src/include/access/heapam.h              |  3 +
 src/include/access/rewriteheap.h         |  4 +-
 src/include/utils/tuplesort.h            |  4 +-
 7 files changed, 88 insertions(+), 40 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index beb8f20708..c5d44c10e8 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -47,7 +47,8 @@
 
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
-									 Datum *values, bool *isnull, RewriteState rwstate);
+									 Datum *values, bool *isnull, bool islive,
+									 RewriteState rwstate);
 
 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 								   HeapTuple tuple,
@@ -778,6 +779,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		HeapTuple	tuple;
 		Buffer		buf;
 		bool		isdead;
+		bool		islive = false;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -844,6 +846,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 			case HEAPTUPLE_LIVE:
 				/* Live or recently dead, must copy it */
 				isdead = false;
+				islive = true;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 
@@ -899,7 +902,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		*num_tuples += 1;
 		if (tuplesort != NULL)
 		{
-			tuplesort_putheaptuple(tuplesort, tuple);
+			tuplesort_putheaptuple(tuplesort, tuple, islive);
 
 			/*
 			 * In scan-and-sort mode, report increase in number of tuples
@@ -917,7 +920,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 			int64		ct_val[2];
 
 			reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
-									 values, isnull, rwstate);
+									 values, isnull, islive, rwstate);
 
 			/*
 			 * In indexscan mode and also VACUUM FULL, report increase in
@@ -957,17 +960,18 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		for (;;)
 		{
 			HeapTuple	tuple;
+			bool		islive;
 
 			CHECK_FOR_INTERRUPTS();
 
-			tuple = tuplesort_getheaptuple(tuplesort, true);
+			tuple = tuplesort_getheaptuple(tuplesort, true, &islive);
 			if (tuple == NULL)
 				break;
 
 			n_tuples += 1;
 			reform_and_rewrite_tuple(tuple,
 									 OldHeap, NewHeap,
-									 values, isnull,
+									 values, isnull, islive,
 									 rwstate);
 			/* Report n_tuples */
 			pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
@@ -2454,7 +2458,8 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
 static void
 reform_and_rewrite_tuple(HeapTuple tuple,
 						 Relation OldHeap, Relation NewHeap,
-						 Datum *values, bool *isnull, RewriteState rwstate)
+						 Datum *values, bool *isnull, bool islive,
+						 RewriteState rwstate)
 {
 	TupleDesc	oldTupDesc = RelationGetDescr(OldHeap);
 	TupleDesc	newTupDesc = RelationGetDescr(NewHeap);
@@ -2473,7 +2478,7 @@ reform_and_rewrite_tuple(HeapTuple tuple,
 	copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
 
 	/* The heap rewrite module does the rest */
-	rewrite_heap_tuple(rwstate, tuple, copiedTuple);
+	rewrite_heap_tuple(rwstate, islive, tuple, copiedTuple);
 
 	heap_freetuple(copiedTuple);
 }
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index af0d1b2d0c..623149ac26 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -424,6 +424,8 @@ rewrite_set_vm_flags(RewriteState state)
 	map[mapByte] |= (flags << mapOffset);
 }
 
+// rewrite_update_vm_flags(state, new_tuple);
+
 /*
  * Update rs_all_visible and rs_all_frozen flags according to the tuple.  We
  * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
@@ -475,11 +477,12 @@ rewrite_update_vm_flags(RewriteState state, HeapTuple tuple)
  * it had better be temp storage not a pointer to the original tuple.
  *
  * state		opaque state as returned by begin_heap_rewrite
+ * is_live		whether the currently processed tuple is live
  * old_tuple	original tuple in the old heap
  * new_tuple	new, rewritten tuple to be inserted to new heap
  */
 void
-rewrite_heap_tuple(RewriteState state,
+rewrite_heap_tuple(RewriteState state, bool is_live,
 				   HeapTuple old_tuple, HeapTuple new_tuple)
 {
 	MemoryContext old_cxt;
@@ -593,7 +596,12 @@ rewrite_heap_tuple(RewriteState state,
 
 		/* Insert the tuple and find out where it's put in new_heap */
 		raw_heap_insert(state, new_tuple);
-		rewrite_update_vm_flags(state, new_tuple);
+
+		heap_page_update_vm_flags(new_tuple, state->rs_oldest_xmin,
+								 is_live, true,
+								 &(state->rs_all_visible),
+								 &(state->rs_all_frozen));
+
 		new_tid = new_tuple->t_self;
 
 		logical_rewrite_heap_tuple(state, old_tid, new_tuple);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 2589f80a52..97e1603f27 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -3611,6 +3611,53 @@ vac_cmp_itemptr(const void *left, const void *right)
 	return 0;
 }
 
+/*
+ * Update all_visible and all_frozen flags according to the tuple.  We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+void
+heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+						  bool is_live, bool check_infomask,
+						  bool *all_visible, bool *all_frozen)
+{
+	TransactionId xmin;
+
+	/* Check comments in lazy_scan_heap. */
+	if (!HeapTupleHeaderXminCommitted(tuple->t_data) || !is_live)
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	/*
+	 * The inserter definitely committed. But is it old enough
+	 * that everyone sees it as committed?
+	 */
+	xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+	if (!TransactionIdPrecedes(xmin, OldestXmin))
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	if (check_infomask &&
+		!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+		!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	/* Check whether this tuple is already frozen or not */
+	if (*all_visible && *all_frozen &&
+		heap_tuple_needs_eventual_freeze(tuple->t_data))
+		*all_frozen = false;
+}
+
 /*
  * Check if every tuple in the given page is visible to all current and future
  * transactions. Also return the visibility_cutoff_xid which is the highest
@@ -3679,36 +3726,16 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
 				{
 					TransactionId xmin;
 
-					/* Check comments in lazy_scan_heap. */
-					if (!HeapTupleHeaderXminCommitted(tuple.t_data))
-					{
-						all_visible = false;
-						*all_frozen = false;
-						break;
-					}
-
-					/*
-					 * The inserter definitely committed. But is it old enough
-					 * that everyone sees it as committed?
-					 */
-					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
-					if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
-					{
-						all_visible = false;
-						*all_frozen = false;
-						break;
-					}
+					heap_page_update_vm_flags(&tuple, vacrel->OldestXmin,
+											  true, false,
+											  &all_visible, all_frozen);
 
 					/* Track newest xmin on page. */
+					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
 					if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
 						*visibility_cutoff_xid = xmin;
-
-					/* Check whether this tuple is already frozen or not */
-					if (all_visible && *all_frozen &&
-						heap_tuple_needs_eventual_freeze(tuple.t_data))
-						*all_frozen = false;
+					break;
 				}
-				break;
 
 			case HEAPTUPLE_DEAD:
 			case HEAPTUPLE_RECENTLY_DEAD:
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index b17347b214..d5a0491bf8 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -180,6 +180,7 @@ typedef struct
 	Datum		datum1;			/* value of first key column */
 	bool		isnull1;		/* is first key column NULL? */
 	int			srctape;		/* source tape number */
+	bool		is_live;		/* is the tuple alive? */
 } SortTuple;
 
 /*
@@ -1703,7 +1704,7 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
  * Note that the input data is always copied; the caller need not save it.
  */
 void
-tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
+tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -1715,6 +1716,7 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 	COPYTUP(state, &stup, (void *) tup);
 
 	puttuple_common(state, &stup);
+	stup.is_live = is_live;
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -2442,7 +2444,7 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
  * remaining valid after any further manipulation of tuplesort.
  */
 HeapTuple
-tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
+tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -2452,6 +2454,9 @@ tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
 
 	MemoryContextSwitchTo(oldcontext);
 
+	if (is_live)
+		*is_live = stup.is_live;
+
 	return stup.tuple;
 }
 
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index e63b49fc38..71bda855f2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -199,6 +199,9 @@ struct VacuumParams;
 extern void heap_vacuum_rel(Relation rel,
 							struct VacuumParams *params, BufferAccessStrategy bstrategy);
 extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
+extern void heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+									  bool is_live, bool check_infomask,
+									  bool *all_visible, bool *all_frozen);
 
 /* in heap/heapam_visibility.c */
 extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 121f552405..2cda7320d3 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -25,8 +25,8 @@ extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
 									   TransactionId OldestXmin, TransactionId FreezeXid,
 									   MultiXactId MultiXactCutoff);
 extern void end_heap_rewrite(RewriteState state);
-extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
-							   HeapTuple newTuple);
+extern void rewrite_heap_tuple(RewriteState state, bool is_live,
+							   HeapTuple oldTuple, HeapTuple newTuple);
 extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
 
 /*
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index f94949370b..45442c4351 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -232,7 +232,7 @@ extern bool tuplesort_used_bound(Tuplesortstate *state);
 
 extern void tuplesort_puttupleslot(Tuplesortstate *state,
 								   TupleTableSlot *slot);
-extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
+extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live);
 extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
 										  Relation rel, ItemPointer self,
 										  Datum *values, bool *isnull);
@@ -243,7 +243,7 @@ extern void tuplesort_performsort(Tuplesortstate *state);
 
 extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
 								   bool copy, TupleTableSlot *slot, Datum *abbrev);
-extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
+extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
 							   Datum *val, bool *isNull, Datum *abbrev);
-- 
2.17.0

>From c40fae27093f895bb2262577cd8fc5b289682493 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 16:55:38 -0500
Subject: [PATCH 3/3] cluster: set relallvisible=num_pages

---
 src/backend/commands/cluster.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 83e968cfa4..86f772c107 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1016,6 +1016,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	relform = (Form_pg_class) GETSTRUCT(reltup);
 
 	relform->relpages = num_pages;
+	relform->relallvisible = num_pages;
 	relform->reltuples = num_tuples;
 
 	/* Don't update the stats for pg_class.  See swap_relation_files. */
-- 
2.17.0

Reply via email to