On Sun, Aug 29, 2021 at 07:26:42PM -0500, Justin Pryzby wrote:
> On Mon, Jun 28, 2021 at 11:22:01AM +0300, Anna Akenteva wrote:
> > On 2019-11-29 05:32, Michael Paquier wrote:
> > > These comments are unanswered for more than 2 months, so I am marking
> > > this entry as returned with feedback.
> > 
> > I'd like to revive this patch. To make further work easier, attaching a
> > rebased version of the latest patch by Alexander.
> > 
> > As for having yet another copy of logic determining visibility: the
> > visibility check was mainly taken from heap_page_is_all_visible(), so I
> > refactored the code to make sure that logic is not duplicated. The updated
> > patch is attached too.
> 
> I agree that it's strange that VACUUM(FREEZE) freezes tuples but not VM bits,
> nor page-level PD_ALL_VISIBLE (which is implied by all frozen).  After FULL
> runs (taking an exclusive lock on the table), it's necessary to then vacuum 
> the
> table again to get what's intended.
> 
> Rebased on f10f0ae420ee62400876ab34dca2c09c20dcd030.

Rebased this patch again

Alexander, are you planning on working on this patch ?

Otherwise, Anna, would you want to "own" the patch ?

> And rephrased Anna's two independent/alternate patches as a 2nd patch on top 
> of
> the 1st, as that helps me to read it and reduces its total size.
> 
> I noticed in testing the patch that autovacuum is still hitting the relation
> shortly after vacuum full.  This is due to n_ins_since_autovacuum, which is 
> new
> in pg13.  I don't know how to address that (or even if it should be addressed
> at all).
> 
> Also, pg_class.relallvisible is not set unless vacuum/analyze is run again
> (which is mitigated by the n_ins behavior above).  It seems like this might be
> important: an plan which uses index-only scan might be missed in favour of
> something else until autovacuum runs (it will use cost-based delay, and might
> not be scheduled immediately, could be interrupted, or even diasbled).
> 
> I'm testing like this:
> CREATE EXTENSION IF NOT EXISTS pg_visibility ; DROP TABLE t; CREATE TABLE t 
> AS SELECT generate_series(1,99999); VACUUM FULL t; ANALYZE t; SELECT 
> n_ins_since_vacuum, n_tup_upd, n_tup_del, n_tup_hot_upd FROM 
> pg_stat_all_tables WHERE relname='t'; SELECT relpages, relallvisible FROM 
> pg_class WHERE oid='t'::regclass; SELECT COUNT(1), COUNT(1)FILTER(WHERE 
> all_visible='t') allvis, COUNT(1)FILTER(WHERE all_frozen='t') allfrz, 
> COUNT(1)FILTER(WHERE pd_all_visible='t') allvispd FROM pg_visibility('t');
>From eb04b296c95a20c2f1b8ac670de91fd93e3d7a33 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 15:57:09 -0500
Subject: [PATCH v2 1/3] VACUUM FULL/CLUSTER: set VM and page-level visibility
 bits

From: 	Alexander Korotkov
write-vm-during-cluster-3-rebase
---
 src/backend/access/heap/rewriteheap.c   | 131 +++++++++++++++++++++++-
 src/backend/access/heap/visibilitymap.c |  18 ----
 src/include/access/visibilitymap.h      |  18 ++++
 3 files changed, 147 insertions(+), 20 deletions(-)

diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 986a776bbd..af0d1b2d0c 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -110,6 +110,7 @@
 #include "access/heaptoast.h"
 #include "access/rewriteheap.h"
 #include "access/transam.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/xloginsert.h"
 #include "catalog/catalog.h"
@@ -152,6 +153,11 @@ typedef struct RewriteStateData
 	HTAB	   *rs_old_new_tid_map; /* unmatched B tuples */
 	HTAB	   *rs_logical_mappings;	/* logical remapping files */
 	uint32		rs_num_rewrite_mappings;	/* # in memory mappings */
+	Page		rs_vm_buffer;	/* visibility map page */
+	BlockNumber rs_vm_blockno;	/* block number of visibility page */
+	BlockNumber rs_vm_buffer_valid;	/* T if any bits are set */
+	bool		rs_all_visible;	/* all visible flag for rs_buffer */
+	bool		rs_all_frozen;	/* all frozen flag for rs_buffer */
 }			RewriteStateData;
 
 /*
@@ -213,6 +219,9 @@ typedef struct RewriteMappingDataEntry
 
 
 /* prototypes for internal functions */
+static void rewrite_flush_vm_page(RewriteState state);
+static void rewrite_set_vm_flags(RewriteState state);
+static void rewrite_update_vm_flags(RewriteState state, HeapTuple tuple);
 static void raw_heap_insert(RewriteState state, HeapTuple tup);
 
 /* internal logical remapping prototypes */
@@ -264,6 +273,11 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
 	state->rs_freeze_xid = freeze_xid;
 	state->rs_cutoff_multi = cutoff_multi;
 	state->rs_cxt = rw_cxt;
+	state->rs_vm_buffer = (Page) palloc(BLCKSZ);
+	state->rs_vm_blockno = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+	state->rs_vm_buffer_valid = false;
+	state->rs_all_visible = true;
+	state->rs_all_frozen = true;
 
 	/* Initialize hash tables used to track update chains */
 	hash_ctl.keysize = sizeof(TidHashKey);
@@ -284,6 +298,9 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
 					&hash_ctl,
 					HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
 
+	if (!smgrexists(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM))
+		smgrcreate(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM, false);
+
 	MemoryContextSwitchTo(old_cxt);
 
 	logical_begin_heap_rewrite(state);
@@ -317,6 +334,9 @@ end_heap_rewrite(RewriteState state)
 	/* Write the last page, if any */
 	if (state->rs_buffer_valid)
 	{
+		if (state->rs_all_visible)
+			PageSetAllVisible(state->rs_buffer);
+
 		if (RelationNeedsWAL(state->rs_new_rel))
 			log_newpage(&state->rs_new_rel->rd_node,
 						MAIN_FORKNUM,
@@ -326,10 +346,16 @@ end_heap_rewrite(RewriteState state)
 
 		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
 
-		smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
-				   state->rs_blockno, (char *) state->rs_buffer, true);
+		smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM, state->rs_blockno,
+				   (char *) state->rs_buffer, true);
+
+		rewrite_set_vm_flags(state);
 	}
 
+	/* Write the last VM page too */
+	if (state->rs_vm_buffer_valid)
+		rewrite_flush_vm_page(state);
+
 	/*
 	 * When we WAL-logged rel pages, we must nonetheless fsync them.  The
 	 * reason is the same as in storage.c's RelationCopyStorage(): we're
@@ -338,7 +364,11 @@ end_heap_rewrite(RewriteState state)
 	 * wrote before the checkpoint.
 	 */
 	if (RelationNeedsWAL(state->rs_new_rel))
+	{
+		/* for an empty table, this could be first smgr access */
 		smgrimmedsync(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM);
+		smgrimmedsync(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM);
+	}
 
 	logical_end_heap_rewrite(state);
 
@@ -346,6 +376,97 @@ end_heap_rewrite(RewriteState state)
 	MemoryContextDelete(state->rs_cxt);
 }
 
+/* Write contents of VM page */
+static void
+rewrite_flush_vm_page(RewriteState state)
+{
+	Assert(state->rs_vm_buffer_valid);
+
+	if (RelationNeedsWAL(state->rs_new_rel))
+		log_newpage(&state->rs_new_rel->rd_node,
+					VISIBILITYMAP_FORKNUM,
+					state->rs_vm_blockno,
+					state->rs_vm_buffer,
+					true);
+
+	PageSetChecksumInplace(state->rs_vm_buffer, state->rs_vm_blockno);
+
+	smgrextend(RelationGetSmgr(state->rs_new_rel), VISIBILITYMAP_FORKNUM,
+			   state->rs_vm_blockno, (char *) state->rs_vm_buffer, true);
+
+	state->rs_vm_buffer_valid = false;
+}
+
+/* Set VM flags to the VM page */
+static void
+rewrite_set_vm_flags(RewriteState state)
+{
+	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(state->rs_blockno);
+	uint32		mapByte = HEAPBLK_TO_MAPBYTE(state->rs_blockno);
+	uint8		mapOffset = HEAPBLK_TO_OFFSET(state->rs_blockno);
+	char	   *map;
+	uint8		flags;
+
+	if (mapBlock != state->rs_vm_blockno && state->rs_vm_buffer_valid)
+		rewrite_flush_vm_page(state);
+
+	if (!state->rs_vm_buffer_valid)
+	{
+		PageInit(state->rs_vm_buffer, BLCKSZ, 0);
+		state->rs_vm_blockno = mapBlock;
+		state->rs_vm_buffer_valid = true;
+	}
+
+	flags = (state->rs_all_visible ? VISIBILITYMAP_ALL_VISIBLE : 0) |
+			(state->rs_all_frozen ? VISIBILITYMAP_ALL_FROZEN : 0);
+
+	map = PageGetContents(state->rs_vm_buffer);
+	map[mapByte] |= (flags << mapOffset);
+}
+
+/*
+ * Update rs_all_visible and rs_all_frozen flags according to the tuple.  We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+static void
+rewrite_update_vm_flags(RewriteState state, HeapTuple tuple)
+{
+	TransactionId	xmin;
+
+	if (!state->rs_all_visible)
+		return;
+
+	if (!HeapTupleHeaderXminCommitted(tuple->t_data))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+	if (!TransactionIdPrecedes(xmin, state->rs_oldest_xmin))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+		!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+	{
+		state->rs_all_visible = false;
+		state->rs_all_frozen = false;
+		return;
+	}
+
+	if (!state->rs_all_frozen)
+		return;
+
+	if (heap_tuple_needs_eventual_freeze(tuple->t_data))
+		state->rs_all_frozen = false;
+}
+
 /*
  * Add a tuple to the new heap.
  *
@@ -472,6 +593,7 @@ rewrite_heap_tuple(RewriteState state,
 
 		/* Insert the tuple and find out where it's put in new_heap */
 		raw_heap_insert(state, new_tuple);
+		rewrite_update_vm_flags(state, new_tuple);
 		new_tid = new_tuple->t_self;
 
 		logical_rewrite_heap_tuple(state, old_tid, new_tuple);
@@ -677,6 +799,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			 * enforce saveFreeSpace unconditionally.
 			 */
 
+			if (state->rs_all_visible)
+				PageSetAllVisible(page);
+
 			/* XLOG stuff */
 			if (RelationNeedsWAL(state->rs_new_rel))
 				log_newpage(&state->rs_new_rel->rd_node,
@@ -695,6 +820,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
 			smgrextend(RelationGetSmgr(state->rs_new_rel), MAIN_FORKNUM,
 					   state->rs_blockno, (char *) page, true);
 
+			rewrite_set_vm_flags(state);
+
 			state->rs_blockno++;
 			state->rs_buffer_valid = false;
 		}
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 114fbbdd30..dd0e686a9f 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -99,24 +99,6 @@
 
 /*#define TRACE_VISIBILITYMAP */
 
-/*
- * Size of the bitmap on each visibility map page, in bytes. There's no
- * extra headers, so the whole page minus the standard page header is
- * used for the bitmap.
- */
-#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-
-/* Number of heap blocks we can represent in one byte */
-#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
-
-/* Number of heap blocks we can represent in one visibility map page. */
-#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
-
-/* Mapping from heap block number to the right bit in the visibility map */
-#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
-#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-
 /* Masks for counting subsets of bits in the visibility map. */
 #define VISIBLE_MASK64	UINT64CONST(0x5555555555555555) /* The lower bit of each
 														 * bit pair */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 0981b218ea..c902900d89 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -26,6 +26,24 @@
 #define VM_ALL_FROZEN(r, b, v) \
 	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_FROZEN) != 0)
 
+/*
+ * Size of the bitmap on each visibility map page, in bytes. There's no
+ * extra headers, so the whole page minus the standard page header is
+ * used for the bitmap.
+ */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one visibility map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the visibility map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
+
 extern bool visibilitymap_clear(Relation rel, BlockNumber heapBlk,
 								Buffer vmbuf, uint8 flags);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
-- 
2.17.0

>From 37b1af07998a249eb6b254ae420a0440d67d3de4 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 16:01:13 -0500
Subject: [PATCH v2 2/3] refactor the code to make sure that logic is not
 duplicated.

0001-write-vm-during-cluster-4.patch
Date: Mon, 28 Jun 2021 11:22:01 +0300
From: Anna Akenteva <a.akent...@postgrespro.ru>
---
 src/backend/access/heap/heapam_handler.c | 19 +++---
 src/backend/access/heap/rewriteheap.c    | 12 +++-
 src/backend/access/heap/vacuumlazy.c     | 75 ++++++++++++++++--------
 src/backend/utils/sort/tuplesort.c       |  9 ++-
 src/include/access/heapam.h              |  3 +
 src/include/access/rewriteheap.h         |  4 +-
 src/include/utils/tuplesort.h            |  4 +-
 7 files changed, 87 insertions(+), 39 deletions(-)

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 9befe012a9..90361080d0 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -47,7 +47,8 @@
 
 static void reform_and_rewrite_tuple(HeapTuple tuple,
 									 Relation OldHeap, Relation NewHeap,
-									 Datum *values, bool *isnull, RewriteState rwstate);
+									 Datum *values, bool *isnull, bool islive,
+									 RewriteState rwstate);
 
 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 								   HeapTuple tuple,
@@ -782,6 +783,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		HeapTuple	tuple;
 		Buffer		buf;
 		bool		isdead;
+		bool		islive = false;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -848,6 +850,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 			case HEAPTUPLE_LIVE:
 				/* Live or recently dead, must copy it */
 				isdead = false;
+				islive = true;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 
@@ -903,7 +906,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		*num_tuples += 1;
 		if (tuplesort != NULL)
 		{
-			tuplesort_putheaptuple(tuplesort, tuple);
+			tuplesort_putheaptuple(tuplesort, tuple, islive);
 
 			/*
 			 * In scan-and-sort mode, report increase in number of tuples
@@ -921,7 +924,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 			int64		ct_val[2];
 
 			reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
-									 values, isnull, rwstate);
+									 values, isnull, islive, rwstate);
 
 			/*
 			 * In indexscan mode and also VACUUM FULL, report increase in
@@ -961,17 +964,18 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 		for (;;)
 		{
 			HeapTuple	tuple;
+			bool		islive;
 
 			CHECK_FOR_INTERRUPTS();
 
-			tuple = tuplesort_getheaptuple(tuplesort, true);
+			tuple = tuplesort_getheaptuple(tuplesort, true, &islive);
 			if (tuple == NULL)
 				break;
 
 			n_tuples += 1;
 			reform_and_rewrite_tuple(tuple,
 									 OldHeap, NewHeap,
-									 values, isnull,
+									 values, isnull, islive,
 									 rwstate);
 			/* Report n_tuples */
 			pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
@@ -2458,7 +2462,8 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
 static void
 reform_and_rewrite_tuple(HeapTuple tuple,
 						 Relation OldHeap, Relation NewHeap,
-						 Datum *values, bool *isnull, RewriteState rwstate)
+						 Datum *values, bool *isnull, bool islive,
+						 RewriteState rwstate)
 {
 	TupleDesc	oldTupDesc = RelationGetDescr(OldHeap);
 	TupleDesc	newTupDesc = RelationGetDescr(NewHeap);
@@ -2477,7 +2482,7 @@ reform_and_rewrite_tuple(HeapTuple tuple,
 	copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
 
 	/* The heap rewrite module does the rest */
-	rewrite_heap_tuple(rwstate, tuple, copiedTuple);
+	rewrite_heap_tuple(rwstate, islive, tuple, copiedTuple);
 
 	heap_freetuple(copiedTuple);
 }
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index af0d1b2d0c..623149ac26 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -424,6 +424,8 @@ rewrite_set_vm_flags(RewriteState state)
 	map[mapByte] |= (flags << mapOffset);
 }
 
+// rewrite_update_vm_flags(state, new_tuple);
+
 /*
  * Update rs_all_visible and rs_all_frozen flags according to the tuple.  We
  * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
@@ -475,11 +477,12 @@ rewrite_update_vm_flags(RewriteState state, HeapTuple tuple)
  * it had better be temp storage not a pointer to the original tuple.
  *
  * state		opaque state as returned by begin_heap_rewrite
+ * is_live		whether the currently processed tuple is live
  * old_tuple	original tuple in the old heap
  * new_tuple	new, rewritten tuple to be inserted to new heap
  */
 void
-rewrite_heap_tuple(RewriteState state,
+rewrite_heap_tuple(RewriteState state, bool is_live,
 				   HeapTuple old_tuple, HeapTuple new_tuple)
 {
 	MemoryContext old_cxt;
@@ -593,7 +596,12 @@ rewrite_heap_tuple(RewriteState state,
 
 		/* Insert the tuple and find out where it's put in new_heap */
 		raw_heap_insert(state, new_tuple);
-		rewrite_update_vm_flags(state, new_tuple);
+
+		heap_page_update_vm_flags(new_tuple, state->rs_oldest_xmin,
+								 is_live, true,
+								 &(state->rs_all_visible),
+								 &(state->rs_all_frozen));
+
 		new_tid = new_tuple->t_self;
 
 		logical_rewrite_heap_tuple(state, old_tid, new_tuple);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 558cc88a08..25ac3e7b51 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -3605,6 +3605,53 @@ vac_cmp_itemptr(const void *left, const void *right)
 	return 0;
 }
 
+/*
+ * Update all_visible and all_frozen flags according to the tuple.  We
+ * use simplified check assuming that HeapTupleSatisfiesVacuum() should already
+ * set tuple hint bits.
+ */
+void
+heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+						  bool is_live, bool check_infomask,
+						  bool *all_visible, bool *all_frozen)
+{
+	TransactionId xmin;
+
+	/* Check comments in lazy_scan_prune. */
+	if (!HeapTupleHeaderXminCommitted(tuple->t_data) || !is_live)
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	/*
+	 * The inserter definitely committed. But is it old enough
+	 * that everyone sees it as committed?
+	 */
+	xmin = HeapTupleHeaderGetXmin(tuple->t_data);
+	if (!TransactionIdPrecedes(xmin, OldestXmin))
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	if (check_infomask &&
+		!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) &&
+		!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask))
+	{
+		*all_visible = false;
+		*all_frozen = false;
+		return;
+	}
+
+	/* Check whether this tuple is already frozen or not */
+	if (*all_visible && *all_frozen &&
+		heap_tuple_needs_eventual_freeze(tuple->t_data))
+		*all_frozen = false;
+}
+
 /*
  * Check if every tuple in the given page is visible to all current and future
  * transactions. Also return the visibility_cutoff_xid which is the highest
@@ -3673,34 +3720,14 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
 				{
 					TransactionId xmin;
 
-					/* Check comments in lazy_scan_prune. */
-					if (!HeapTupleHeaderXminCommitted(tuple.t_data))
-					{
-						all_visible = false;
-						*all_frozen = false;
-						break;
-					}
-
-					/*
-					 * The inserter definitely committed. But is it old enough
-					 * that everyone sees it as committed?
-					 */
-					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
-					if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
-					{
-						all_visible = false;
-						*all_frozen = false;
-						break;
-					}
+					heap_page_update_vm_flags(&tuple, vacrel->OldestXmin,
+											  true, false,
+											  &all_visible, all_frozen);
 
 					/* Track newest xmin on page. */
+					xmin = HeapTupleHeaderGetXmin(tuple.t_data);
 					if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
 						*visibility_cutoff_xid = xmin;
-
-					/* Check whether this tuple is already frozen or not */
-					if (all_visible && *all_frozen &&
-						heap_tuple_needs_eventual_freeze(tuple.t_data))
-						*all_frozen = false;
 				}
 				break;
 
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 90e26745df..97b783e4a1 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -184,6 +184,7 @@ typedef struct
 	Datum		datum1;			/* value of first key column */
 	bool		isnull1;		/* is first key column NULL? */
 	int			srctape;		/* source tape number */
+	bool		is_live;		/* is the tuple alive? */
 } SortTuple;
 
 /*
@@ -1706,7 +1707,7 @@ tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
  * Note that the input data is always copied; the caller need not save it.
  */
 void
-tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
+tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -1718,6 +1719,7 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 	COPYTUP(state, &stup, (void *) tup);
 
 	puttuple_common(state, &stup);
+	stup.is_live = is_live;
 
 	MemoryContextSwitchTo(oldcontext);
 }
@@ -2442,7 +2444,7 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
  * remaining valid after any further manipulation of tuplesort.
  */
 HeapTuple
-tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
+tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -2452,6 +2454,9 @@ tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
 
 	MemoryContextSwitchTo(oldcontext);
 
+	if (is_live)
+		*is_live = stup.is_live;
+
 	return stup.tuple;
 }
 
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index e63b49fc38..71bda855f2 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -199,6 +199,9 @@ struct VacuumParams;
 extern void heap_vacuum_rel(Relation rel,
 							struct VacuumParams *params, BufferAccessStrategy bstrategy);
 extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
+extern void heap_page_update_vm_flags(HeapTuple tuple, TransactionId OldestXmin,
+									  bool is_live, bool check_infomask,
+									  bool *all_visible, bool *all_frozen);
 
 /* in heap/heapam_visibility.c */
 extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 121f552405..2cda7320d3 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -25,8 +25,8 @@ extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
 									   TransactionId OldestXmin, TransactionId FreezeXid,
 									   MultiXactId MultiXactCutoff);
 extern void end_heap_rewrite(RewriteState state);
-extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
-							   HeapTuple newTuple);
+extern void rewrite_heap_tuple(RewriteState state, bool is_live,
+							   HeapTuple oldTuple, HeapTuple newTuple);
 extern bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple oldTuple);
 
 /*
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index f94949370b..45442c4351 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -232,7 +232,7 @@ extern bool tuplesort_used_bound(Tuplesortstate *state);
 
 extern void tuplesort_puttupleslot(Tuplesortstate *state,
 								   TupleTableSlot *slot);
-extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
+extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup, bool is_live);
 extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
 										  Relation rel, ItemPointer self,
 										  Datum *values, bool *isnull);
@@ -243,7 +243,7 @@ extern void tuplesort_performsort(Tuplesortstate *state);
 
 extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
 								   bool copy, TupleTableSlot *slot, Datum *abbrev);
-extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
+extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *is_live);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
 							   Datum *val, bool *isNull, Datum *abbrev);
-- 
2.17.0

>From de6586495575dd550e74a760f46060c0d51ea1d4 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 29 Aug 2021 16:55:38 -0500
Subject: [PATCH v2 3/3] cluster: set relallvisible=num_pages

---
 src/backend/commands/cluster.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 83e968cfa4..86f772c107 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1016,6 +1016,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	relform = (Form_pg_class) GETSTRUCT(reltup);
 
 	relform->relpages = num_pages;
+	relform->relallvisible = num_pages;
 	relform->reltuples = num_tuples;
 
 	/* Don't update the stats for pg_class.  See swap_relation_files. */
-- 
2.17.0

Reply via email to