From 8bac0453c9414f2b888cb916559d1909cd07be64 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Fri, 11 Mar 2022 19:16:02 -0800
Subject: [PATCH v12 1/3] Set relfrozenxid to oldest extant XID seen by VACUUM.

When VACUUM set relfrozenxid before now, it set it to whatever value was
used to determine which tuples to freeze -- the FreezeLimit cutoff.
This approach was very naive: the relfrozenxid invariant only requires
that new relfrozenxid values be <= the oldest extant XID remaining in
the table (at the point that the VACUUM operation ends), which in
general might be much more recent than FreezeLimit.

VACUUM now sets relfrozenxid (and relminmxid) using the exact oldest
extant XID (and oldest extant MultiXactId) from the table, including
XIDs from the table's remaining/unfrozen MultiXacts.  This requires that
VACUUM carefully track the oldest unfrozen XID/MultiXactId as it goes.
This optimization doesn't require any changes to the definition of
relfrozenxid, nor does it require changes to the core design of
freezing.

Final relfrozenxid values must still be >= FreezeLimit in an aggressive
VACUUM -- FreezeLimit still acts as a lower bound on the final value
that aggressive VACUUM can set relfrozenxid to.  Since standard VACUUMs
still make no guarantees about advancing relfrozenxid, they might as
well set relfrozenxid to a value from well before FreezeLimit when the
opportunity presents itself.  In general standard VACUUMs may now set
relfrozenxid to any value > the original relfrozenxid and <= OldestXmin.

Credit for the general idea of using the oldest extant XID to set
pg_class.relfrozenxid at the end of VACUUM goes to Andres Freund.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Andres Freund <andres@anarazel.de>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkymFbz6D_vL+jmqSn_5q1wsFvFrE+37yLgL_Rkfd6Gzg@mail.gmail.com
---
 src/include/access/heapam.h                   |   4 +-
 src/include/access/heapam_xlog.h              |   4 +-
 src/include/commands/vacuum.h                 |   1 +
 src/backend/access/heap/heapam.c              | 306 ++++++++++++++----
 src/backend/access/heap/vacuumlazy.c          | 175 ++++++----
 src/backend/commands/cluster.c                |   5 +-
 src/backend/commands/vacuum.c                 |  42 +--
 doc/src/sgml/maintenance.sgml                 |  30 +-
 .../expected/vacuum-no-cleanup-lock.out       | 189 +++++++++++
 .../isolation/expected/vacuum-reltuples.out   |  67 ----
 src/test/isolation/isolation_schedule         |   2 +-
 .../specs/vacuum-no-cleanup-lock.spec         | 150 +++++++++
 .../isolation/specs/vacuum-reltuples.spec     |  49 ---
 13 files changed, 744 insertions(+), 280 deletions(-)
 create mode 100644 src/test/isolation/expected/vacuum-no-cleanup-lock.out
 delete mode 100644 src/test/isolation/expected/vacuum-reltuples.out
 create mode 100644 src/test/isolation/specs/vacuum-no-cleanup-lock.spec
 delete mode 100644 src/test/isolation/specs/vacuum-reltuples.spec

diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index b46ab7d73..df5b31700 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -168,7 +168,9 @@ extern bool heap_freeze_tuple(HeapTupleHeader tuple,
 							  TransactionId relfrozenxid, TransactionId relminmxid,
 							  TransactionId cutoff_xid, TransactionId cutoff_multi);
 extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
-									MultiXactId cutoff_multi);
+									MultiXactId cutoff_multi,
+									TransactionId *relfrozenxid_nofreeze_out,
+									MultiXactId *relminmxid_nofreeze_out);
 extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
 
 extern void simple_heap_insert(Relation relation, HeapTuple tup);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 5c47fdcec..2d8a7f627 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -410,7 +410,9 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 									  TransactionId cutoff_xid,
 									  TransactionId cutoff_multi,
 									  xl_heap_freeze_tuple *frz,
-									  bool *totally_frozen);
+									  bool *totally_frozen,
+									  TransactionId *relfrozenxid_out,
+									  MultiXactId *relminmxid_out);
 extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
 									  xl_heap_freeze_tuple *xlrec_tp);
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d64f6268f..ead88edda 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -291,6 +291,7 @@ extern bool vacuum_set_xid_limits(Relation rel,
 								  int multixact_freeze_min_age,
 								  int multixact_freeze_table_age,
 								  TransactionId *oldestXmin,
+								  MultiXactId *oldestMxact,
 								  TransactionId *freezeLimit,
 								  MultiXactId *multiXactCutoff);
 extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3746336a0..55670f507 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6128,7 +6128,12 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  * NB -- this might have the side-effect of creating a new MultiXactId!
  *
  * "flags" is an output value; it's used to tell caller what to do on return.
- * Possible flags are:
+ *
+ * "xmax_oldest_xid_out" is an output value; we must handle the details of
+ * tracking the oldest extant member Xid within any Multixact that will
+ * remain.  This is one component used by caller to track relfrozenxid_out.
+ *
+ * Possible values that we can set in "flags":
  * FRM_NOOP
  *		don't do anything -- keep existing Xmax
  * FRM_INVALIDATE_XMAX
@@ -6140,12 +6145,18 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  * FRM_RETURN_IS_MULTI
  *		The return value is a new MultiXactId to set as new Xmax.
  *		(caller must obtain proper infomask bits using GetMultiXactIdHintBits)
+ *
+ * Final "xmax_oldest_xid_out" value should be ignored completely unless
+ * "flags" contains either FRM_NOOP or FRM_RETURN_IS_MULTI.  Final value is
+ * drawn from oldest extant Xid that will remain in some MultiXact (old or
+ * new) after xmax is processed.  Xids that won't remain after processing will
+ * never affect final "xmax_oldest_xid_out" set here, per general convention.
  */
 static TransactionId
 FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				  TransactionId relfrozenxid, TransactionId relminmxid,
 				  TransactionId cutoff_xid, MultiXactId cutoff_multi,
-				  uint16 *flags)
+				  uint16 *flags, TransactionId *xmax_oldest_xid_out)
 {
 	TransactionId xid = InvalidTransactionId;
 	int			i;
@@ -6157,6 +6168,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	bool		has_lockers;
 	TransactionId update_xid;
 	bool		update_committed;
+	TransactionId temp_xid_out;
 
 	*flags = 0;
 
@@ -6228,6 +6240,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			}
 		}
 
+		/*
+		 * Don't push back xmax_oldest_xid_out using FRM_RETURN_IS_XID Xid, or
+		 * when no Xids will remain
+		 */
 		return xid;
 	}
 
@@ -6251,6 +6267,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 
 	/* is there anything older than the cutoff? */
 	need_replace = false;
+	temp_xid_out = *xmax_oldest_xid_out;	/* init for FRM_NOOP */
 	for (i = 0; i < nmembers; i++)
 	{
 		if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
@@ -6258,28 +6275,38 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			need_replace = true;
 			break;
 		}
+		if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+			temp_xid_out = members[i].xid;
 	}
 
 	/*
 	 * In the simplest case, there is no member older than the cutoff; we can
-	 * keep the existing MultiXactId as is.
+	 * keep the existing MultiXactId as-is, avoiding a more expensive second
+	 * pass over the multi
 	 */
 	if (!need_replace)
 	{
+		/*
+		 * When xmax_oldest_xid_out gets pushed back here it's likely that the
+		 * update Xid was the oldest member, but we don't rely on that
+		 */
 		*flags |= FRM_NOOP;
+		*xmax_oldest_xid_out = temp_xid_out;
 		pfree(members);
-		return InvalidTransactionId;
+		return multi;
 	}
 
 	/*
-	 * If the multi needs to be updated, figure out which members do we need
-	 * to keep.
+	 * Do a more thorough second pass over the multi to figure out which
+	 * member XIDs actually need to be kept.  Checking the precise status of
+	 * individual members might even show that we don't need to keep anything.
 	 */
 	nnewmembers = 0;
 	newmembers = palloc(sizeof(MultiXactMember) * nmembers);
 	has_lockers = false;
 	update_xid = InvalidTransactionId;
 	update_committed = false;
+	temp_xid_out = *xmax_oldest_xid_out;	/* init for FRM_RETURN_IS_MULTI */
 
 	for (i = 0; i < nmembers; i++)
 	{
@@ -6335,7 +6362,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			}
 
 			/*
-			 * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the
+			 * Since the tuple wasn't totally removed when vacuum pruned, the
 			 * update Xid cannot possibly be older than the xid cutoff. The
 			 * presence of such a tuple would cause corruption, so be paranoid
 			 * and check.
@@ -6348,15 +6375,20 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 										 update_xid, cutoff_xid)));
 
 			/*
-			 * If we determined that it's an Xid corresponding to an update
-			 * that must be retained, additionally add it to the list of
-			 * members of the new Multi, in case we end up using that.  (We
-			 * might still decide to use only an update Xid and not a multi,
-			 * but it's easier to maintain the list as we walk the old members
-			 * list.)
+			 * We determined that this is an Xid corresponding to an update
+			 * that must be retained -- add it to new members list for later.
+			 *
+			 * Also consider pushing back temp_xid_out, which is needed when
+			 * we later conclude that a new multi is required (i.e. when we go
+			 * on to set FRM_RETURN_IS_MULTI for our caller because we also
+			 * need to retain a locker that's still running).
 			 */
 			if (TransactionIdIsValid(update_xid))
+			{
 				newmembers[nnewmembers++] = members[i];
+				if (TransactionIdPrecedes(members[i].xid, temp_xid_out))
+					temp_xid_out = members[i].xid;
+			}
 		}
 		else
 		{
@@ -6374,11 +6406,17 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 
 	pfree(members);
 
+	/*
+	 * Determine what to do with caller's multi based on information gathered
+	 * during our second pass
+	 */
 	if (nnewmembers == 0)
 	{
 		/* nothing worth keeping!? Tell caller to remove the whole thing */
 		*flags |= FRM_INVALIDATE_XMAX;
 		xid = InvalidTransactionId;
+
+		/* Don't push back xmax_oldest_xid_out -- no Xids will remain */
 	}
 	else if (TransactionIdIsValid(update_xid) && !has_lockers)
 	{
@@ -6394,6 +6432,8 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 		if (update_committed)
 			*flags |= FRM_MARK_COMMITTED;
 		xid = update_xid;
+
+		/* Don't push back xmax_oldest_xid_out using FRM_RETURN_IS_XID Xid */
 	}
 	else
 	{
@@ -6403,6 +6443,12 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 		 */
 		xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers);
 		*flags |= FRM_RETURN_IS_MULTI;
+
+		/*
+		 * The oldest Xid we're transferring from the old multixact over to
+		 * the new one might push back xmax_oldest_xid_out
+		 */
+		*xmax_oldest_xid_out = temp_xid_out;
 	}
 
 	pfree(newmembers);
@@ -6421,21 +6467,30 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
  * will be totally frozen after these operations are performed and false if
  * more freezing will eventually be required.
  *
+ * The *relfrozenxid_out and *relminmxid_out arguments are the current target
+ * relfrozenxid and relminmxid for VACUUM caller's heap rel.  Any and all
+ * unfrozen XIDs or MXIDs that remain in caller's rel after VACUUM finishes
+ * _must_ have values >= the final relfrozenxid/relminmxid values in pg_class.
+ * This includes XIDs that remain as MultiXact members from any tuple's xmax.
+ * Each call here pushes back *relfrozenxid_out and/or *relminmxid_out as
+ * needed to avoid unsafe final values in rel's authoritative pg_class tuple.
+ *
  * Caller is responsible for setting the offset field, if appropriate.
  *
  * It is assumed that the caller has checked the tuple with
  * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
  * (else we should be removing the tuple, not freezing it).
  *
- * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
+ * NB: This function has side effects: it might allocate a new MultiXactId.
+ * It will be set as tuple's new xmax when our *frz output is processed within
+ * heap_execute_freeze_tuple later on.  If the tuple is in a shared buffer
+ * then caller had better have an exclusive lock on it already.
+ *
+ * NB: cutoff_xid *must* be <= VACUUM's OldestXmin, to ensure that any
  * XID older than it could neither be running nor seen as running by any
  * open transaction.  This ensures that the replacement will not change
  * anyone's idea of the tuple state.
- * Similarly, cutoff_multi must be less than or equal to the smallest
- * MultiXactId used by any transaction currently open.
- *
- * If the tuple is in a shared buffer, caller must hold an exclusive lock on
- * that buffer.
+ * Similarly, cutoff_multi must be <= VACUUM's OldestMxact.
  *
  * NB: It is not enough to set hint bits to indicate something is
  * committed/invalid -- they might not be set on a standby, or after crash
@@ -6445,7 +6500,9 @@ bool
 heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 						  TransactionId relfrozenxid, TransactionId relminmxid,
 						  TransactionId cutoff_xid, TransactionId cutoff_multi,
-						  xl_heap_freeze_tuple *frz, bool *totally_frozen)
+						  xl_heap_freeze_tuple *frz, bool *totally_frozen,
+						  TransactionId *relfrozenxid_out,
+						  MultiXactId *relminmxid_out)
 {
 	bool		changed = false;
 	bool		xmax_already_frozen = false;
@@ -6464,7 +6521,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	 * already a permanent value), while in the block below it is set true to
 	 * mean "xmin won't need freezing after what we do to it here" (false
 	 * otherwise).  In both cases we're allowed to set totally_frozen, as far
-	 * as xmin is concerned.
+	 * as xmin is concerned.  Both cases also don't require relfrozenxid_out
+	 * handling, since either way the tuple's xmin will be a permanent value
+	 * once we're done with it.
 	 */
 	xid = HeapTupleHeaderGetXmin(tuple);
 	if (!TransactionIdIsNormal(xid))
@@ -6489,6 +6548,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			frz->t_infomask |= HEAP_XMIN_FROZEN;
 			changed = true;
 		}
+		else
+		{
+			/* xmin to remain unfrozen.  Could push back relfrozenxid_out. */
+			if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+				*relfrozenxid_out = xid;
+		}
 	}
 
 	/*
@@ -6506,15 +6571,29 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	{
 		TransactionId newxmax;
 		uint16		flags;
+		TransactionId xmax_oldest_xid_out = *relfrozenxid_out;
 
 		newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
 									relfrozenxid, relminmxid,
-									cutoff_xid, cutoff_multi, &flags);
+									cutoff_xid, cutoff_multi,
+									&flags, &xmax_oldest_xid_out);
 
 		freeze_xmax = (flags & FRM_INVALIDATE_XMAX);
 
 		if (flags & FRM_RETURN_IS_XID)
 		{
+			/*
+			 * xmax will become an updater Xid (original MultiXact's updater
+			 * member Xid will be carried forward as a simple Xid in Xmax).
+			 * Might have to ratchet back relfrozenxid_out here, though never
+			 * relminmxid_out.
+			 */
+			Assert(!freeze_xmax);
+			Assert(TransactionIdIsValid(newxmax));
+			if (TransactionIdPrecedes(newxmax, *relfrozenxid_out))
+				*relfrozenxid_out = newxmax;
+			/* Note: xmax_oldest_xid_out isn't valid here */
+
 			/*
 			 * NB -- some of these transformations are only valid because we
 			 * know the return Xid is a tuple updater (i.e. not merely a
@@ -6533,6 +6612,19 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			uint16		newbits;
 			uint16		newbits2;
 
+			/*
+			 * xmax is an old MultiXactId which we have to replace with a new
+			 * Multixact, that carries forward some of the original's Xids.
+			 * Might have to ratchet back relfrozenxid_out here, though never
+			 * relminmxid_out.
+			 */
+			Assert(!freeze_xmax);
+			Assert(MultiXactIdIsValid(newxmax));
+			Assert(!MultiXactIdPrecedes(newxmax, *relminmxid_out));
+			Assert(TransactionIdPrecedesOrEquals(xmax_oldest_xid_out,
+												 *relfrozenxid_out));
+			*relfrozenxid_out = xmax_oldest_xid_out;
+
 			/*
 			 * We can't use GetMultiXactIdHintBits directly on the new multi
 			 * here; that routine initializes the masks to all zeroes, which
@@ -6549,6 +6641,30 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 
 			changed = true;
 		}
+		else if (flags & FRM_NOOP)
+		{
+			/*
+			 * xmax is a MultiXactId, and nothing about it changes for now.
+			 * Might have to ratchet back relminmxid_out, relfrozenxid_out, or
+			 * both together.
+			 */
+			Assert(!freeze_xmax);
+			Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
+			Assert(TransactionIdPrecedesOrEquals(xmax_oldest_xid_out,
+												 *relfrozenxid_out));
+			if (MultiXactIdPrecedes(xid, *relminmxid_out))
+				*relminmxid_out = xid;
+			*relfrozenxid_out = xmax_oldest_xid_out;
+		}
+		else
+		{
+			/*
+			 * Neither keeping an Xid or a MultiXactId for xmax (freezing it).
+			 * Won't have to ratchet back relminmxid_out or relfrozenxid_out.
+			 */
+			Assert(freeze_xmax);
+			Assert(!TransactionIdIsValid(newxmax));
+		}
 	}
 	else if (TransactionIdIsNormal(xid))
 	{
@@ -6573,15 +6689,21 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 						 errmsg_internal("cannot freeze committed xmax %u",
 										 xid)));
 			freeze_xmax = true;
+			/* No need for relfrozenxid_out handling, since we'll freeze xmax */
 		}
 		else
+		{
 			freeze_xmax = false;
+			if (TransactionIdPrecedes(xid, *relfrozenxid_out))
+				*relfrozenxid_out = xid;
+		}
 	}
 	else if ((tuple->t_infomask & HEAP_XMAX_INVALID) ||
 			 !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
 	{
 		freeze_xmax = false;
 		xmax_already_frozen = true;
+		/* No need for relfrozenxid_out handling for already-frozen xmax */
 	}
 	else
 		ereport(ERROR,
@@ -6622,6 +6744,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 		 * was removed in PostgreSQL 9.0.  Note that if we were to respect
 		 * cutoff_xid here, we'd need to make surely to clear totally_frozen
 		 * when we skipped freezing on that basis.
+		 *
+		 * No need for relfrozenxid_out handling, since we always freeze xvac.
 		 */
 		if (TransactionIdIsNormal(xid))
 		{
@@ -6699,11 +6823,14 @@ heap_freeze_tuple(HeapTupleHeader tuple,
 	xl_heap_freeze_tuple frz;
 	bool		do_freeze;
 	bool		tuple_totally_frozen;
+	TransactionId relfrozenxid_out = cutoff_xid;
+	MultiXactId relminmxid_out = cutoff_multi;
 
 	do_freeze = heap_prepare_freeze_tuple(tuple,
 										  relfrozenxid, relminmxid,
 										  cutoff_xid, cutoff_multi,
-										  &frz, &tuple_totally_frozen);
+										  &frz, &tuple_totally_frozen,
+										  &relfrozenxid_out, &relminmxid_out);
 
 	/*
 	 * Note that because this is not a WAL-logged operation, we don't need to
@@ -7136,79 +7263,122 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
  * It doesn't matter whether the tuple is alive or dead, we are checking
  * to see if a tuple needs to be removed or frozen to avoid wraparound.
  *
+ * The *relfrozenxid_nofreeze_out and *relminmxid_nofreeze_out arguments are
+ * input/output arguments that are similar to heap_prepare_freeze_tuple's
+ * *relfrozenxid_out and *relminmxid_out input/output arguments.  There is one
+ * big difference: we track the oldest extant XID and XMID while making a
+ * working assumption that freezing won't go ahead.  heap_prepare_freeze_tuple
+ * assumes that freezing will go ahead (based on the specific instructions it
+ * provides for its caller's tuple).
+ *
+ * Note, in particular, that we even assume that freezing won't go ahead for a
+ * tuple that we indicate "needs freezing" (by returning true).  Not all
+ * callers will be okay with that.  Caller should make temp copies of global
+ * tracking variables, and pass us those.  That way caller can back out at the
+ * last moment when it must freeze the tuple using heap_prepare_freeze_tuple.
+ *
  * NB: Cannot rely on hint bits here, they might not be set after a crash or
  * on a standby.
  */
 bool
 heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
-						MultiXactId cutoff_multi)
+						MultiXactId cutoff_multi,
+						TransactionId *relfrozenxid_nofreeze_out,
+						MultiXactId *relminmxid_nofreeze_out)
 {
+	bool		needs_freeze = false;
 	TransactionId xid;
+	MultiXactId multi;
 
+	/* First deal with xmin */
 	xid = HeapTupleHeaderGetXmin(tuple);
-	if (TransactionIdIsNormal(xid) &&
-		TransactionIdPrecedes(xid, cutoff_xid))
-		return true;
+	if (TransactionIdIsNormal(xid))
+	{
+		if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+			*relfrozenxid_nofreeze_out = xid;
+		if (TransactionIdPrecedes(xid, cutoff_xid))
+			needs_freeze = true;
+	}
 
 	/*
+	 * Now deal with xmax.
+	 *
 	 * The considerations for multixacts are complicated; look at
 	 * heap_prepare_freeze_tuple for justifications.  This routine had better
 	 * be in sync with that one!
 	 */
+	xid = InvalidTransactionId;
+	multi = InvalidMultiXactId;
 	if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
-	{
-		MultiXactId multi;
-
 		multi = HeapTupleHeaderGetRawXmax(tuple);
-		if (!MultiXactIdIsValid(multi))
-		{
-			/* no xmax set, ignore */
-			;
-		}
-		else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
-			return true;
-		else if (MultiXactIdPrecedes(multi, cutoff_multi))
-			return true;
-		else
-		{
-			MultiXactMember *members;
-			int			nmembers;
-			int			i;
+	else
+		xid = HeapTupleHeaderGetRawXmax(tuple);
 
-			/* need to check whether any member of the mxact is too old */
-
-			nmembers = GetMultiXactIdMembers(multi, &members, false,
-											 HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
-
-			for (i = 0; i < nmembers; i++)
-			{
-				if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
-				{
-					pfree(members);
-					return true;
-				}
-			}
-			if (nmembers > 0)
-				pfree(members);
-		}
+	if (TransactionIdIsNormal(xid))
+	{
+		/* xmax is a non-permanent XID */
+		if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+			*relfrozenxid_nofreeze_out = xid;
+		if (TransactionIdPrecedes(xid, cutoff_xid))
+			needs_freeze = true;
+	}
+	else if (!MultiXactIdIsValid(multi))
+	{
+		/* xmax is a permanent XID or invalid MultiXactId/XID */
+	}
+	else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
+	{
+		/* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
+		if (MultiXactIdPrecedes(multi, *relminmxid_nofreeze_out))
+			*relminmxid_nofreeze_out = multi;
+		/* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
+		needs_freeze = true;
 	}
 	else
 	{
-		xid = HeapTupleHeaderGetRawXmax(tuple);
-		if (TransactionIdIsNormal(xid) &&
-			TransactionIdPrecedes(xid, cutoff_xid))
-			return true;
+		/* xmax is a MultiXactId that may have an updater XID */
+		MultiXactMember *members;
+		int			nmembers;
+
+		if (MultiXactIdPrecedes(multi, *relminmxid_nofreeze_out))
+			*relminmxid_nofreeze_out = multi;
+		if (MultiXactIdPrecedes(multi, cutoff_multi))
+			needs_freeze = true;
+
+		/*
+		 * relfrozenxid_nofreeze_out might need to be pushed back by the
+		 * oldest member XID from the mxact.  Need to check its members now.
+		 * (Might also affect whether we advise caller to freeze tuple.)
+		 */
+		nmembers = GetMultiXactIdMembers(multi, &members, false,
+										 HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
+
+		for (int i = 0; i < nmembers; i++)
+		{
+			xid = members[i].xid;
+			Assert(TransactionIdIsNormal(xid));
+			if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+				*relfrozenxid_nofreeze_out = xid;
+			if (TransactionIdPrecedes(xid, cutoff_xid))
+				needs_freeze = true;
+		}
+		if (nmembers > 0)
+			pfree(members);
 	}
 
 	if (tuple->t_infomask & HEAP_MOVED)
 	{
 		xid = HeapTupleHeaderGetXvac(tuple);
-		if (TransactionIdIsNormal(xid) &&
-			TransactionIdPrecedes(xid, cutoff_xid))
-			return true;
+		if (TransactionIdIsNormal(xid))
+		{
+			if (TransactionIdPrecedes(xid, *relfrozenxid_nofreeze_out))
+				*relfrozenxid_nofreeze_out = xid;
+			/* heap_prepare_freeze_tuple always freezes xvac */
+			needs_freeze = true;
+		}
 	}
 
-	return false;
+	return needs_freeze;
 }
 
 /*
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 87ab7775a..723408744 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -144,7 +144,7 @@ typedef struct LVRelState
 	Relation   *indrels;
 	int			nindexes;
 
-	/* Aggressive VACUUM (scan all unfrozen pages)? */
+	/* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
 	bool		aggressive;
 	/* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
 	bool		skipwithvm;
@@ -173,8 +173,9 @@ typedef struct LVRelState
 	/* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */
 	TransactionId FreezeLimit;
 	MultiXactId MultiXactCutoff;
-	/* Are FreezeLimit/MultiXactCutoff still valid? */
-	bool		freeze_cutoffs_valid;
+	/* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
+	TransactionId NewRelfrozenXid;
+	MultiXactId NewRelminMxid;
 
 	/* Error reporting state */
 	char	   *relnamespace;
@@ -319,15 +320,15 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 				skipwithvm;
 	bool		frozenxid_updated,
 				minmulti_updated;
-	BlockNumber orig_rel_pages;
+	BlockNumber orig_rel_pages,
+				new_rel_pages,
+				new_rel_allvisible;
 	char	  **indnames = NULL;
-	BlockNumber new_rel_pages;
-	BlockNumber new_rel_allvisible;
-	double		new_live_tuples;
 	ErrorContextCallback errcallback;
 	PgStat_Counter startreadtime = 0;
 	PgStat_Counter startwritetime = 0;
 	TransactionId OldestXmin;
+	MultiXactId OldestMxact;
 	TransactionId FreezeLimit;
 	MultiXactId MultiXactCutoff;
 
@@ -351,20 +352,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	/*
 	 * Get OldestXmin cutoff, which is used to determine which deleted tuples
 	 * are considered DEAD, not just RECENTLY_DEAD.  Also get related cutoffs
-	 * used to determine which XIDs/MultiXactIds will be frozen.
-	 *
-	 * If this is an aggressive VACUUM, then we're strictly required to freeze
-	 * any and all XIDs from before FreezeLimit, so that we will be able to
-	 * safely advance relfrozenxid up to FreezeLimit below (we must be able to
-	 * advance relminmxid up to MultiXactCutoff, too).
+	 * used to determine which XIDs/MultiXactIds will be frozen.  If this is
+	 * an aggressive VACUUM then lazy_scan_heap cannot leave behind unfrozen
+	 * XIDs < FreezeLimit (or unfrozen MXIDs < MultiXactCutoff).
 	 */
 	aggressive = vacuum_set_xid_limits(rel,
 									   params->freeze_min_age,
 									   params->freeze_table_age,
 									   params->multixact_freeze_min_age,
 									   params->multixact_freeze_table_age,
-									   &OldestXmin, &FreezeLimit,
-									   &MultiXactCutoff);
+									   &OldestXmin, &OldestMxact,
+									   &FreezeLimit, &MultiXactCutoff);
 
 	skipwithvm = true;
 	if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
@@ -511,10 +509,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->vistest = GlobalVisTestFor(rel);
 	/* FreezeLimit controls XID freezing (always <= OldestXmin) */
 	vacrel->FreezeLimit = FreezeLimit;
-	/* MultiXactCutoff controls MXID freezing */
+	/* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */
 	vacrel->MultiXactCutoff = MultiXactCutoff;
-	/* Track if cutoffs became invalid (possible in !aggressive case only) */
-	vacrel->freeze_cutoffs_valid = true;
+	/* Initialize state used to track oldest extant XID/XMID */
+	vacrel->NewRelfrozenXid = OldestXmin;
+	vacrel->NewRelminMxid = OldestMxact;
 
 	/*
 	 * Call lazy_scan_heap to perform all required heap pruning, index
@@ -548,51 +547,57 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	/*
 	 * Prepare to update rel's pg_class entry.
 	 *
-	 * In principle new_live_tuples could be -1 indicating that we (still)
-	 * don't know the tuple count.  In practice that probably can't happen,
-	 * since we'd surely have scanned some pages if the table is new and
-	 * nonempty.
-	 *
 	 * For safety, clamp relallvisible to be not more than what we're setting
 	 * relpages to.
 	 */
 	new_rel_pages = vacrel->rel_pages;	/* After possible rel truncation */
-	new_live_tuples = vacrel->new_live_tuples;
 	visibilitymap_count(rel, &new_rel_allvisible, NULL);
 	if (new_rel_allvisible > new_rel_pages)
 		new_rel_allvisible = new_rel_pages;
 
 	/*
-	 * Now actually update rel's pg_class entry.
-	 *
-	 * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid).
-	 * We are able to advance relfrozenxid in a non-aggressive VACUUM too,
-	 * provided we didn't skip any all-visible (not all-frozen) pages using
-	 * the visibility map, and assuming that we didn't fail to get a cleanup
-	 * lock that made it unsafe with respect to FreezeLimit (or perhaps our
-	 * MultiXactCutoff) established for VACUUM operation.
+	 * Aggressive VACUUMs must advance relfrozenxid to a value >= FreezeLimit,
+	 * and advance relminmxid to a value >= MultiXactCutoff.
 	 */
-	if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages ||
-		!vacrel->freeze_cutoffs_valid)
+	Assert(!aggressive || vacrel->NewRelfrozenXid == OldestXmin ||
+		   TransactionIdPrecedesOrEquals(FreezeLimit,
+										 vacrel->NewRelfrozenXid));
+	Assert(!aggressive || vacrel->NewRelminMxid == OldestMxact ||
+		   MultiXactIdPrecedesOrEquals(MultiXactCutoff,
+									   vacrel->NewRelminMxid));
+
+	/*
+	 * Non-aggressive VACUUMs might advance relfrozenxid to an XID that is
+	 * either older or newer than FreezeLimit (same applies to relminmxid and
+	 * MultiXactCutoff).  But the state that tracks the oldest remaining XID
+	 * and MXID cannot be trusted when any all-visible pages were skipped.
+	 */
+	Assert(vacrel->NewRelfrozenXid == OldestXmin ||
+		   TransactionIdPrecedesOrEquals(vacrel->relfrozenxid,
+										 vacrel->NewRelfrozenXid));
+	Assert(vacrel->NewRelminMxid == OldestMxact ||
+		   MultiXactIdPrecedesOrEquals(vacrel->relminmxid,
+									   vacrel->NewRelminMxid));
+	if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages)
 	{
-		/* Cannot advance relfrozenxid/relminmxid */
+		/* Keep existing relfrozenxid and relminmxid (can't trust trackers) */
 		Assert(!aggressive);
-		frozenxid_updated = minmulti_updated = false;
-		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
-							new_rel_allvisible, vacrel->nindexes > 0,
-							InvalidTransactionId, InvalidMultiXactId,
-							NULL, NULL, false);
-	}
-	else
-	{
-		Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages ==
-			   orig_rel_pages);
-		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
-							new_rel_allvisible, vacrel->nindexes > 0,
-							FreezeLimit, MultiXactCutoff,
-							&frozenxid_updated, &minmulti_updated, false);
+		vacrel->NewRelfrozenXid = InvalidTransactionId;
+		vacrel->NewRelminMxid = InvalidMultiXactId;
 	}
 
+	/*
+	 * Now actually update rel's pg_class entry
+	 *
+	 * In principle new_live_tuples could be -1 indicating that we (still)
+	 * don't know the tuple count.  In practice that can't happen, since we
+	 * scan every page that isn't skipped using the visibility map.
+	 */
+	vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
+						new_rel_allvisible, vacrel->nindexes > 0,
+						vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
+						&frozenxid_updated, &minmulti_updated, false);
+
 	/*
 	 * Report results to the stats collector, too.
 	 *
@@ -605,7 +610,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 */
 	pgstat_report_vacuum(RelationGetRelid(rel),
 						 rel->rd_rel->relisshared,
-						 Max(new_live_tuples, 0),
+						 Max(vacrel->new_live_tuples, 0),
 						 vacrel->recently_dead_tuples +
 						 vacrel->missed_dead_tuples);
 	pgstat_progress_end_command();
@@ -694,17 +699,19 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 							 OldestXmin, diff);
 			if (frozenxid_updated)
 			{
-				diff = (int32) (FreezeLimit - vacrel->relfrozenxid);
+				diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
+				Assert(diff > 0);
 				appendStringInfo(&buf,
 								 _("new relfrozenxid: %u, which is %d xids ahead of previous value\n"),
-								 FreezeLimit, diff);
+								 vacrel->NewRelfrozenXid, diff);
 			}
 			if (minmulti_updated)
 			{
-				diff = (int32) (MultiXactCutoff - vacrel->relminmxid);
+				diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
+				Assert(diff > 0);
 				appendStringInfo(&buf,
 								 _("new relminmxid: %u, which is %d mxids ahead of previous value\n"),
-								 MultiXactCutoff, diff);
+								 vacrel->NewRelminMxid, diff);
 			}
 			if (orig_rel_pages > 0)
 			{
@@ -1584,6 +1591,8 @@ lazy_scan_prune(LVRelState *vacrel,
 				recently_dead_tuples;
 	int			nnewlpdead;
 	int			nfrozen;
+	TransactionId NewRelfrozenXid;
+	MultiXactId NewRelminMxid;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 	xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage];
 
@@ -1593,7 +1602,9 @@ lazy_scan_prune(LVRelState *vacrel,
 
 retry:
 
-	/* Initialize (or reset) page-level counters */
+	/* Initialize (or reset) page-level state */
+	NewRelfrozenXid = vacrel->NewRelfrozenXid;
+	NewRelminMxid = vacrel->NewRelminMxid;
 	tuples_deleted = 0;
 	lpdead_items = 0;
 	live_tuples = 0;
@@ -1801,7 +1812,8 @@ retry:
 									  vacrel->FreezeLimit,
 									  vacrel->MultiXactCutoff,
 									  &frozen[nfrozen],
-									  &tuple_totally_frozen))
+									  &tuple_totally_frozen,
+									  &NewRelfrozenXid, &NewRelminMxid))
 		{
 			/* Will execute freeze below */
 			frozen[nfrozen++].offset = offnum;
@@ -1815,13 +1827,16 @@ retry:
 			prunestate->all_frozen = false;
 	}
 
+	vacrel->offnum = InvalidOffsetNumber;
+
 	/*
 	 * We have now divided every item on the page into either an LP_DEAD item
 	 * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
 	 * that remains and needs to be considered for freezing now (LP_UNUSED and
 	 * LP_REDIRECT items also remain, but are of no further interest to us).
 	 */
-	vacrel->offnum = InvalidOffsetNumber;
+	vacrel->NewRelfrozenXid = NewRelfrozenXid;
+	vacrel->NewRelminMxid = NewRelminMxid;
 
 	/*
 	 * Consider the need to freeze any items with tuple storage from the page
@@ -1972,6 +1987,8 @@ lazy_scan_noprune(LVRelState *vacrel,
 				missed_dead_tuples;
 	HeapTupleHeader tupleheader;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
+	TransactionId NoFreezeNewRelfrozenXid = vacrel->NewRelfrozenXid;
+	MultiXactId NoFreezeNewRelminMxid = vacrel->NewRelminMxid;
 
 	Assert(BufferGetBlockNumber(buf) == blkno);
 
@@ -2017,20 +2034,39 @@ lazy_scan_noprune(LVRelState *vacrel,
 		tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
 		if (heap_tuple_needs_freeze(tupleheader,
 									vacrel->FreezeLimit,
-									vacrel->MultiXactCutoff))
+									vacrel->MultiXactCutoff,
+									&NoFreezeNewRelfrozenXid,
+									&NoFreezeNewRelminMxid))
 		{
+			/* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
+
 			if (vacrel->aggressive)
 			{
-				/* Going to have to get cleanup lock for lazy_scan_prune */
+				/*
+				 * Aggressive VACUUMs must always be able to advance rel's
+				 * relfrozenxid to a value >= FreezeLimit (and to advance
+				 * rel's relminmxid to a value >= MultiXactCutoff).  The
+				 * ongoing aggressive VACUUM cannot satisfy these requirements
+				 * without freezing an XID (or XMID) from this tuple.
+				 *
+				 * The only safe option is to have caller perform processing
+				 * of this page using lazy_scan_prune.  Caller might have to
+				 * wait a while for a cleanup lock, but it can't be helped.
+				 */
 				vacrel->offnum = InvalidOffsetNumber;
 				return false;
 			}
-
-			/*
-			 * Current non-aggressive VACUUM operation definitely won't be
-			 * able to advance relfrozenxid or relminmxid
-			 */
-			vacrel->freeze_cutoffs_valid = false;
+			else
+			{
+				/*
+				 * Standard VACUUMs are not obligated to advance relfrozenxid
+				 * or relminmxid by any amount, so we can be much laxer here.
+				 *
+				 * Currently we always just accept an older final relfrozenxid
+				 * and/or relminmxid value.  We never make caller wait or work
+				 * a little harder, even when it likely makes sense to do so.
+				 */
+			}
 		}
 
 		ItemPointerSet(&(tuple.t_self), blkno, offnum);
@@ -2080,9 +2116,14 @@ lazy_scan_noprune(LVRelState *vacrel,
 	vacrel->offnum = InvalidOffsetNumber;
 
 	/*
-	 * Now save details of the LP_DEAD items from the page in vacrel (though
-	 * only when VACUUM uses two-pass strategy)
+	 * By here we know for sure that caller can tolerate reduced processing
+	 * for this particular page.  Save all of the details in vacrel now.
+	 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
 	 */
+	vacrel->NewRelfrozenXid = NoFreezeNewRelfrozenXid;
+	vacrel->NewRelminMxid = NoFreezeNewRelminMxid;
+
+	/* Save details of the LP_DEAD items from the page */
 	if (vacrel->nindexes == 0)
 	{
 		/* Using one-pass strategy (since table has no indexes) */
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 02a7e94bf..a7e988298 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -767,6 +767,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	TupleDesc	oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
 	TupleDesc	newTupDesc PG_USED_FOR_ASSERTS_ONLY;
 	TransactionId OldestXmin;
+	MultiXactId oldestMxact;
 	TransactionId FreezeXid;
 	MultiXactId MultiXactCutoff;
 	bool		use_sort;
@@ -856,8 +857,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 	 * Since we're going to rewrite the whole table anyway, there's no reason
 	 * not to be aggressive about this.
 	 */
-	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
-						  &OldestXmin, &FreezeXid, &MultiXactCutoff);
+	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &oldestMxact,
+						  &FreezeXid, &MultiXactCutoff);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 50a4a612e..0ae3b4506 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -945,14 +945,22 @@ get_all_vacuum_rels(int options)
  * The output parameters are:
  * - oldestXmin is the Xid below which tuples deleted by any xact (that
  *   committed) should be considered DEAD, not just RECENTLY_DEAD.
- * - freezeLimit is the Xid below which all Xids are replaced by
- *	 FrozenTransactionId during vacuum.
- * - multiXactCutoff is the value below which all MultiXactIds are removed
- *   from Xmax.
+ * - oldestMxact is the Mxid below which MultiXacts are definitely not
+ *   seen as visible by any running transaction.
+ * - freezeLimit is the Xid below which all Xids are definitely replaced by
+ *   FrozenTransactionId during aggressive vacuums.
+ * - multiXactCutoff is the value below which all MultiXactIds are definitely
+ *   removed from Xmax during aggressive vacuums.
  *
  * Return value indicates if vacuumlazy.c caller should make its VACUUM
  * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
- * FreezeLimit, and relminmxid up to multiXactCutoff.
+ * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a
+ * minimum).
+ *
+ * oldestXmin and oldestMxact are the most recent values that can ever be
+ * passed to vac_update_relstats() as frozenxid and minmulti arguments by our
+ * vacuumlazy.c caller later on.  These values should be passed when it turns
+ * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table.
  */
 bool
 vacuum_set_xid_limits(Relation rel,
@@ -961,6 +969,7 @@ vacuum_set_xid_limits(Relation rel,
 					  int multixact_freeze_min_age,
 					  int multixact_freeze_table_age,
 					  TransactionId *oldestXmin,
+					  MultiXactId *oldestMxact,
 					  TransactionId *freezeLimit,
 					  MultiXactId *multiXactCutoff)
 {
@@ -969,7 +978,6 @@ vacuum_set_xid_limits(Relation rel,
 	int			effective_multixact_freeze_max_age;
 	TransactionId limit;
 	TransactionId safeLimit;
-	MultiXactId oldestMxact;
 	MultiXactId mxactLimit;
 	MultiXactId safeMxactLimit;
 	int			freezetable;
@@ -1065,9 +1073,11 @@ vacuum_set_xid_limits(Relation rel,
 						 effective_multixact_freeze_max_age / 2);
 	Assert(mxid_freezemin >= 0);
 
+	/* Remember for caller */
+	*oldestMxact = GetOldestMultiXactId();
+
 	/* compute the cutoff multi, being careful to generate a valid value */
-	oldestMxact = GetOldestMultiXactId();
-	mxactLimit = oldestMxact - mxid_freezemin;
+	mxactLimit = *oldestMxact - mxid_freezemin;
 	if (mxactLimit < FirstMultiXactId)
 		mxactLimit = FirstMultiXactId;
 
@@ -1082,8 +1092,8 @@ vacuum_set_xid_limits(Relation rel,
 				(errmsg("oldest multixact is far in the past"),
 				 errhint("Close open transactions with multixacts soon to avoid wraparound problems.")));
 		/* Use the safe limit, unless an older mxact is still running */
-		if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit))
-			mxactLimit = oldestMxact;
+		if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit))
+			mxactLimit = *oldestMxact;
 		else
 			mxactLimit = safeMxactLimit;
 	}
@@ -1390,14 +1400,10 @@ vac_update_relstats(Relation relation,
 	 * Update relfrozenxid, unless caller passed InvalidTransactionId
 	 * indicating it has no new data.
 	 *
-	 * Ordinarily, we don't let relfrozenxid go backwards: if things are
-	 * working correctly, the only way the new frozenxid could be older would
-	 * be if a previous VACUUM was done with a tighter freeze_min_age, in
-	 * which case we don't want to forget the work it already did.  However,
-	 * if the stored relfrozenxid is "in the future", then it must be corrupt
-	 * and it seems best to overwrite it with the cutoff we used this time.
-	 * This should match vac_update_datfrozenxid() concerning what we consider
-	 * to be "in the future".
+	 * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
+	 * stored relfrozenxid is "in the future", then it must be corrupt, so
+	 * just overwrite it.  This should match vac_update_datfrozenxid()
+	 * concerning what we consider to be "in the future".
 	 */
 	if (frozenxid_updated)
 		*frozenxid_updated = false;
diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 36f975b1e..6a02d0fa8 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -563,9 +563,11 @@
     statistics in the system tables <structname>pg_class</structname> and
     <structname>pg_database</structname>.  In particular,
     the <structfield>relfrozenxid</structfield> column of a table's
-    <structname>pg_class</structname> row contains the freeze cutoff XID that was used
-    by the last aggressive <command>VACUUM</command> for that table.  All rows
-    inserted by transactions with XIDs older than this cutoff XID are
+    <structname>pg_class</structname> row contains the oldest
+    remaining XID at the end of the most recent <command>VACUUM</command>
+    that successfully advanced <structfield>relfrozenxid</structfield>
+    (typically the most recent aggressive VACUUM).  All rows inserted
+    by transactions with XIDs older than this cutoff XID are
     guaranteed to have been frozen.  Similarly,
     the <structfield>datfrozenxid</structfield> column of a database's
     <structname>pg_database</structname> row is a lower bound on the unfrozen XIDs
@@ -588,6 +590,17 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
     cutoff XID to the current transaction's XID.
    </para>
 
+   <tip>
+    <para>
+     <literal>VACUUM VERBOSE</literal> outputs information about
+     <structfield>relfrozenxid</structfield> and/or
+     <structfield>relminmxid</structfield> when either field was
+     advanced.  The same details appear in the server log when <xref
+      linkend="guc-log-autovacuum-min-duration"/> reports on vacuuming
+     by autovacuum.
+    </para>
+   </tip>
+
    <para>
     <command>VACUUM</command> normally only scans pages that have been modified
     since the last vacuum, but <structfield>relfrozenxid</structfield> can only be
@@ -602,7 +615,11 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
     set <literal>age(relfrozenxid)</literal> to a value just a little more than the
     <varname>vacuum_freeze_min_age</varname> setting
     that was used (more by the number of transactions started since the
-    <command>VACUUM</command> started).  If no <structfield>relfrozenxid</structfield>-advancing
+    <command>VACUUM</command> started).  <command>VACUUM</command>
+    will set <structfield>relfrozenxid</structfield> to the oldest XID
+    that remains in the table, so it's possible that the final value
+    will be much more recent than strictly required.
+    If no <structfield>relfrozenxid</structfield>-advancing
     <command>VACUUM</command> is issued on the table until
     <varname>autovacuum_freeze_max_age</varname> is reached, an autovacuum will soon
     be forced for the table.
@@ -689,8 +706,9 @@ HINT:  Stop the postmaster and vacuum that database in single-user mode.
     </para>
 
     <para>
-     Aggressive <command>VACUUM</command> scans, regardless of
-     what causes them, enable advancing the value for that table.
+     Aggressive <command>VACUUM</command> scans, regardless of what
+     causes them, are <emphasis>guaranteed</emphasis> to be able to
+     advance the table's <structfield>relminmxid</structfield>.
      Eventually, as all tables in all databases are scanned and their
      oldest multixact values are advanced, on-disk storage for older
      multixacts can be removed.
diff --git a/src/test/isolation/expected/vacuum-no-cleanup-lock.out b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
new file mode 100644
index 000000000..f7bc93e8f
--- /dev/null
+++ b/src/test/isolation/expected/vacuum-no-cleanup-lock.out
@@ -0,0 +1,189 @@
+Parsed test spec with 4 sessions
+
+starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete: 
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       20
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step dml_delete: 
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_insert: 
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step vacuumer_pg_class_stats: 
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+
+relpages|reltuples
+--------+---------
+       1|       21
+(1 row)
+
+step pinholder_commit: 
+  COMMIT;
+
+
+starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_nonaggressive_vacuum pinholder_cursor dml_other_update dml_commit dml_other_commit vacuumer_nonaggressive_vacuum pinholder_commit vacuumer_nonaggressive_vacuum
+step dml_begin: BEGIN;
+step dml_other_begin: BEGIN;
+step dml_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step dml_other_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE;
+id
+--
+ 3
+(1 row)
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step pinholder_cursor: 
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+
+dummy
+-----
+    1
+(1 row)
+
+step dml_other_update: UPDATE smalltbl SET t = 'u' WHERE id = 3;
+step dml_commit: COMMIT;
+step dml_other_commit: COMMIT;
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
+step pinholder_commit: 
+  COMMIT;
+
+step vacuumer_nonaggressive_vacuum: 
+  VACUUM smalltbl;
+
diff --git a/src/test/isolation/expected/vacuum-reltuples.out b/src/test/isolation/expected/vacuum-reltuples.out
deleted file mode 100644
index ce55376e7..000000000
--- a/src/test/isolation/expected/vacuum-reltuples.out
+++ /dev/null
@@ -1,67 +0,0 @@
-Parsed test spec with 2 sessions
-
-starting permutation: modify vac stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac: 
-    vacuum smalltbl;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
-
-starting permutation: modify open fetch1 vac close stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step open: 
-    begin;
-    declare c1 cursor for select 1 as dummy from smalltbl;
-
-step fetch1: 
-    fetch next from c1;
-
-dummy
------
-    1
-(1 row)
-
-step vac: 
-    vacuum smalltbl;
-
-step close: 
-    commit;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
-
-starting permutation: modify vac stats
-step modify: 
-    insert into smalltbl select max(id)+1 from smalltbl;
-
-step vac: 
-    vacuum smalltbl;
-
-step stats: 
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-
-relpages|reltuples
---------+---------
-       1|       21
-(1 row)
-
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 8e8709815..35e0d1ee4 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -80,7 +80,7 @@ test: alter-table-4
 test: create-trigger
 test: sequence-ddl
 test: async-notify
-test: vacuum-reltuples
+test: vacuum-no-cleanup-lock
 test: timeouts
 test: vacuum-concurrent-drop
 test: vacuum-conflict
diff --git a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
new file mode 100644
index 000000000..a88be66de
--- /dev/null
+++ b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec
@@ -0,0 +1,150 @@
+# Test for vacuum's reduced processing of heap pages (used for any heap page
+# where a cleanup lock isn't immediately available)
+#
+# Debugging tip: Change VACUUM to VACUUM VERBOSE to get feedback on what's
+# really going on
+
+# Use name type here to avoid TOAST table:
+setup
+{
+  CREATE TABLE smalltbl AS SELECT i AS id, 't'::name AS t FROM generate_series(1,20) i;
+  ALTER TABLE smalltbl SET (autovacuum_enabled = off);
+  ALTER TABLE smalltbl ADD PRIMARY KEY (id);
+}
+setup
+{
+  VACUUM ANALYZE smalltbl;
+}
+
+teardown
+{
+  DROP TABLE smalltbl;
+}
+
+# This session holds a pin on smalltbl's only heap page:
+session pinholder
+step pinholder_cursor
+{
+  BEGIN;
+  DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl;
+  FETCH NEXT FROM c1;
+}
+step pinholder_commit
+{
+  COMMIT;
+}
+
+# This session inserts and deletes tuples, potentially affecting reltuples:
+session dml
+step dml_insert
+{
+  INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl;
+}
+step dml_delete
+{
+  DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl);
+}
+step dml_begin            { BEGIN; }
+step dml_key_share        { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_commit           { COMMIT; }
+
+# Needed for Multixact test:
+session dml_other
+step dml_other_begin      { BEGIN; }
+step dml_other_key_share  { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; }
+step dml_other_update     { UPDATE smalltbl SET t = 'u' WHERE id = 3; }
+step dml_other_commit     { COMMIT; }
+
+# This session runs non-aggressive VACUUM, but with maximally aggressive
+# cutoffs for tuple freezing (e.g., FreezeLimit == OldestXmin):
+session vacuumer
+setup
+{
+  SET vacuum_freeze_min_age = 0;
+  SET vacuum_multixact_freeze_min_age = 0;
+}
+step vacuumer_nonaggressive_vacuum
+{
+  VACUUM smalltbl;
+}
+step vacuumer_pg_class_stats
+{
+  SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass;
+}
+
+# Test VACUUM's reltuples counting mechanism.
+#
+# Final pg_class.reltuples should never be affected by VACUUM's inability to
+# get a cleanup lock on any page, except to the extent that any cleanup lock
+# contention changes the number of tuples that remain ("missed dead" tuples
+# are counted in reltuples, much like "recently dead" tuples).
+
+# Easy case:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    vacuumer_pg_class_stats  # End with 21 tuples
+
+# Harder case -- count 21 tuples at the end (like last time), but with cleanup
+# lock contention this time:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    pinholder_cursor
+    vacuumer_nonaggressive_vacuum
+    vacuumer_pg_class_stats  # End with 21 tuples
+    pinholder_commit  # order doesn't matter
+
+# Same as "harder case", but vary the order, and delete an inserted row:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    pinholder_cursor
+    dml_insert
+    dml_delete
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    # reltuples is 21 here again -- "recently dead" tuple won't be included in
+    # count here:
+    vacuumer_pg_class_stats
+    pinholder_commit  # order doesn't matter
+
+# Same as "harder case", but initial insert and delete before cursor:
+permutation
+    vacuumer_pg_class_stats  # Start with 20 tuples
+    dml_insert
+    dml_delete
+    pinholder_cursor
+    dml_insert
+    vacuumer_nonaggressive_vacuum
+    # reltuples is 21 here again -- "missed dead" tuple ("recently dead" when
+    # concurrent activity held back VACUUM's OldestXmin) won't be included in
+    # count here:
+    vacuumer_pg_class_stats
+    pinholder_commit  # order doesn't matter
+
+# Test VACUUM's mechanism for skipping MultiXact freezing.
+#
+# This provides test coverage for code paths that are only hit when we need to
+# freeze, but inability to acquire a cleanup lock on a heap page makes
+# freezing some XIDs/XMIDs < FreezeLimit/MultiXactCutoff impossible (without
+# waiting for a cleanup lock, which non-aggressive VACUUM is unwilling to do).
+permutation
+    dml_begin
+    dml_other_begin
+    dml_key_share
+    dml_other_key_share
+    # Will get cleanup lock, can't advance relminmxid yet:
+    # (though will usually advance relfrozenxid by ~2 XIDs)
+    vacuumer_nonaggressive_vacuum
+    pinholder_cursor
+    dml_other_update
+    dml_commit
+    dml_other_commit
+    # Can't cleanup lock, so still can't advance relminmxid here:
+    # (relfrozenxid held back by XIDs in MultiXact too)
+    vacuumer_nonaggressive_vacuum
+    pinholder_commit
+    # Pin was dropped, so will advance relminmxid, at long last:
+    # (ditto for relfrozenxid advancement)
+    vacuumer_nonaggressive_vacuum
diff --git a/src/test/isolation/specs/vacuum-reltuples.spec b/src/test/isolation/specs/vacuum-reltuples.spec
deleted file mode 100644
index a2a461f2f..000000000
--- a/src/test/isolation/specs/vacuum-reltuples.spec
+++ /dev/null
@@ -1,49 +0,0 @@
-# Test for vacuum's handling of reltuples when pages are skipped due
-# to page pins. We absolutely need to avoid setting reltuples=0 in
-# such cases, since that interferes badly with planning.
-#
-# Expected result for all three permutation is 21 tuples, including
-# the second permutation.  VACUUM is able to count the concurrently
-# inserted tuple in its final reltuples, even when a cleanup lock
-# cannot be acquired on the affected heap page.
-
-setup {
-    create table smalltbl
-        as select i as id from generate_series(1,20) i;
-    alter table smalltbl set (autovacuum_enabled = off);
-}
-setup {
-    vacuum analyze smalltbl;
-}
-
-teardown {
-    drop table smalltbl;
-}
-
-session worker
-step open {
-    begin;
-    declare c1 cursor for select 1 as dummy from smalltbl;
-}
-step fetch1 {
-    fetch next from c1;
-}
-step close {
-    commit;
-}
-step stats {
-    select relpages, reltuples from pg_class
-     where oid='smalltbl'::regclass;
-}
-
-session vacuumer
-step vac {
-    vacuum smalltbl;
-}
-step modify {
-    insert into smalltbl select max(id)+1 from smalltbl;
-}
-
-permutation modify vac stats
-permutation modify open fetch1 vac close stats
-permutation modify vac stats
-- 
2.32.0

