On 09/12/2025 14:00, Heikki Linnakangas wrote:
1. Currently, at multixid wraparound, MultiXactState->nextMXact goes to 0, which is invalid. All the readers must be prepared for that, and skip over the 0. That's error-prone, we've already missed that a few times. Let's change things so that the code that *writes* MultiXactState- >nextMXact skips over the zero already.

Here's a patch for that. Does anyone see a problem with this?

- Heikki
From fb5865dcc5654a601cc8db796e62ea928016396a Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <[email protected]>
Date: Wed, 10 Dec 2025 21:16:29 +0200
Subject: [PATCH v1 1/1] refactor: Never store 0 as the nextMXact

Before this commit, when multixid wraparound happens,
MultiXactState->nextMXact goes to 0, which is invalid. All the readers
deal with that possibility and skip over the 0. That's error-prone,
we've missed that a few times in the past. This commit changes the
responsibility so that all writers of MultiXactState->nextMXact skip
over the zero already, and readers can trust that it's never 0.

Discussion: https://www.postgresql.org/message-id/[email protected]
---
 src/backend/access/transam/multixact.c | 79 +++++++-------------------
 src/bin/pg_resetwal/pg_resetwal.c      |  2 +
 src/bin/pg_resetwal/t/001_basic.pl     | 15 +----
 3 files changed, 24 insertions(+), 72 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 6ca3d44261e..2e20a0907d8 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -104,6 +104,12 @@ PreviousMultiXactId(MultiXactId multi)
 	return multi == FirstMultiXactId ? MaxMultiXactId : multi - 1;
 }
 
+static inline MultiXactId
+NextMultiXactId(MultiXactId multi)
+{
+	return multi == MaxMultiXactId ? FirstMultiXactId : multi + 1;
+}
+
 /*
  * Links to shared-memory data structures for MultiXact control
  */
@@ -552,14 +558,7 @@ MultiXactIdSetOldestMember(void)
 		 */
 		LWLockAcquire(MultiXactGenLock, LW_SHARED);
 
-		/*
-		 * We have to beware of the possibility that nextMXact is in the
-		 * wrapped-around state.  We don't fix the counter itself here, but we
-		 * must be sure to store a valid value in our array entry.
-		 */
 		nextMXact = MultiXactState->nextMXact;
-		if (nextMXact < FirstMultiXactId)
-			nextMXact = FirstMultiXactId;
 
 		OldestMemberMXactId[MyProcNumber] = nextMXact;
 
@@ -596,15 +595,7 @@ MultiXactIdSetOldestVisible(void)
 
 		LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 
-		/*
-		 * We have to beware of the possibility that nextMXact is in the
-		 * wrapped-around state.  We don't fix the counter itself here, but we
-		 * must be sure to store a valid value in our array entry.
-		 */
 		oldestMXact = MultiXactState->nextMXact;
-		if (oldestMXact < FirstMultiXactId)
-			oldestMXact = FirstMultiXactId;
-
 		for (i = 0; i < MaxOldestSlot; i++)
 		{
 			MultiXactId thisoldest = OldestMemberMXactId[i];
@@ -637,9 +628,6 @@ ReadNextMultiXactId(void)
 	mxid = MultiXactState->nextMXact;
 	LWLockRelease(MultiXactGenLock);
 
-	if (mxid < FirstMultiXactId)
-		mxid = FirstMultiXactId;
-
 	return mxid;
 }
 
@@ -654,11 +642,6 @@ ReadMultiXactIdRange(MultiXactId *oldest, MultiXactId *next)
 	*oldest = MultiXactState->oldestMultiXactId;
 	*next = MultiXactState->nextMXact;
 	LWLockRelease(MultiXactGenLock);
-
-	if (*oldest < FirstMultiXactId)
-		*oldest = FirstMultiXactId;
-	if (*next < FirstMultiXactId)
-		*next = FirstMultiXactId;
 }
 
 
@@ -794,9 +777,7 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 	entryno = MultiXactIdToOffsetEntry(multi);
 
 	/* position of the next multixid */
-	next = multi + 1;
-	if (next < FirstMultiXactId)
-		next = FirstMultiXactId;
+	next = NextMultiXactId(multi);
 	next_pageno = MultiXactIdToOffsetPage(next);
 	next_entryno = MultiXactIdToOffsetEntry(next);
 
@@ -955,10 +936,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 
-	/* Handle wraparound of the nextMXact counter */
-	if (MultiXactState->nextMXact < FirstMultiXactId)
-		MultiXactState->nextMXact = FirstMultiXactId;
-
 	/* Assign the MXID */
 	result = MultiXactState->nextMXact;
 
@@ -1025,7 +1002,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 		 * request only once per 64K multis generated.  This still gives
 		 * plenty of chances before we get into real trouble.
 		 */
-		if (IsUnderPostmaster && (result % 65536) == 0)
+		if (IsUnderPostmaster && ((result % 65536) == 0 || result == FirstMultiXactId))
 			SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
 
 		if (!MultiXactIdPrecedes(result, multiWarnLimit))
@@ -1056,15 +1033,13 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 		/* Re-acquire lock and start over */
 		LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 		result = MultiXactState->nextMXact;
-		if (result < FirstMultiXactId)
-			result = FirstMultiXactId;
 	}
 
 	/*
 	 * Make sure there is room for the next MXID in the file.  Assigning this
 	 * MXID sets the next MXID's offset already.
 	 */
-	ExtendMultiXactOffset(result + 1);
+	ExtendMultiXactOffset(NextMultiXactId(result));
 
 	/*
 	 * Reserve the members space, similarly to above.
@@ -1098,15 +1073,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	/*
 	 * Advance counters.  As in GetNewTransactionId(), this must not happen
 	 * until after file extension has succeeded!
-	 *
-	 * We don't care about MultiXactId wraparound here; it will be handled by
-	 * the next iteration.  But note that nextMXact may be InvalidMultiXactId
-	 * or the first value on a segment-beginning page after this routine
-	 * exits, so anyone else looking at the variable must be prepared to deal
-	 * with either case.
 	 */
-	(MultiXactState->nextMXact)++;
-
+	MultiXactState->nextMXact = NextMultiXactId(result);
 	MultiXactState->nextOffset += nmembers;
 
 	LWLockRelease(MultiXactGenLock);
@@ -1252,9 +1220,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 		MultiXactOffset nextMXOffset;
 
 		/* handle wraparound if needed */
-		tmpMXact = multi + 1;
-		if (tmpMXact < FirstMultiXactId)
-			tmpMXact = FirstMultiXactId;
+		tmpMXact = NextMultiXactId(multi);
 
 		prev_pageno = pageno;
 
@@ -1898,7 +1864,7 @@ TrimMultiXact(void)
 		LWLock	   *lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
 
 		LWLockAcquire(lock, LW_EXCLUSIVE);
-		if (entryno == 0)
+		if (entryno == 0 || nextMXact == FirstMultiXactId)
 			slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
 		else
 			slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
@@ -2014,8 +1980,10 @@ void
 MultiXactSetNextMXact(MultiXactId nextMulti,
 					  MultiXactOffset nextMultiOffset)
 {
+	Assert(MultiXactIdIsValid(nextMulti));
 	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
 				nextMulti, nextMultiOffset);
+
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->nextMXact = nextMulti;
 	MultiXactState->nextOffset = nextMultiOffset;
@@ -2184,6 +2152,8 @@ void
 MultiXactAdvanceNextMXact(MultiXactId minMulti,
 						  MultiXactOffset minMultiOffset)
 {
+	Assert(MultiXactIdIsValid(minMulti));
+
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
 	{
@@ -2321,7 +2291,6 @@ MultiXactId
 GetOldestMultiXactId(void)
 {
 	MultiXactId oldestMXact;
-	MultiXactId nextMXact;
 	int			i;
 
 	/*
@@ -2329,17 +2298,7 @@ GetOldestMultiXactId(void)
 	 * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
 	 */
 	LWLockAcquire(MultiXactGenLock, LW_SHARED);
-
-	/*
-	 * We have to beware of the possibility that nextMXact is in the
-	 * wrapped-around state.  We don't fix the counter itself here, but we
-	 * must be sure to use a valid value in our calculation.
-	 */
-	nextMXact = MultiXactState->nextMXact;
-	if (nextMXact < FirstMultiXactId)
-		nextMXact = FirstMultiXactId;
-
-	oldestMXact = nextMXact;
+	oldestMXact = MultiXactState->nextMXact;
 	for (i = 0; i < MaxOldestSlot; i++)
 	{
 		MultiXactId thisoldest;
@@ -2660,6 +2619,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 
 	Assert(!RecoveryInProgress());
 	Assert(MultiXactState->finishedStartup);
+	Assert(MultiXactIdIsValid(newOldestMulti));
 
 	/*
 	 * We can only allow one truncation to happen at once. Otherwise parts of
@@ -2674,7 +2634,6 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 	nextOffset = MultiXactState->nextOffset;
 	oldestMulti = MultiXactState->oldestMultiXactId;
 	LWLockRelease(MultiXactGenLock);
-	Assert(MultiXactIdIsValid(oldestMulti));
 
 	/*
 	 * Make sure to only attempt truncation if there's values to truncate
@@ -2944,7 +2903,7 @@ multixact_redo(XLogReaderState *record)
 						   xlrec->members);
 
 		/* Make sure nextMXact/nextOffset are beyond what this record has */
-		MultiXactAdvanceNextMXact(xlrec->mid + 1,
+		MultiXactAdvanceNextMXact(NextMultiXactId(xlrec->mid),
 								  xlrec->moff + xlrec->nmembers);
 
 		/*
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 56012d5f4c4..9bfab8c307b 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -287,6 +287,8 @@ main(int argc, char *argv[])
 				 * XXX It'd be nice to have more sanity checks here, e.g. so
 				 * that oldest is not wrapped around w.r.t. nextMulti.
 				 */
+				if (next_mxid_val == 0)
+					pg_fatal("next multitransaction ID (-m) must not be 0");
 				if (oldest_mxid_val == 0)
 					pg_fatal("oldest multitransaction ID (-m) must not be 0");
 				mxids_given = true;
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 8bab9add74f..dde024a7f14 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -119,19 +119,10 @@ command_fails_like(
 	[ 'pg_resetwal', '-m' => '10,bar', $node->data_dir ],
 	qr/error: invalid argument for option -m/,
 	'fails with incorrect -m option part 2');
-
-# This used to be forbidden, but nextMulti can legitimately be 0 after
-# wraparound, so we now accept it in pg_resetwal too.
-command_ok(
-	[ 'pg_resetwal', '-m' => '0,10', $node->data_dir ],
-	'succeeds with -m value 0 in the first part');
-
-# -0 doesn't make sense however
 command_fails_like(
-	[ 'pg_resetwal', '-m' => '-0,10', $node->data_dir ],
-	qr/error: invalid argument for option -m/,
-	'fails with -m value -0 in the first part');
-
+	[ 'pg_resetwal', '-m' => '0,10', $node->data_dir ],
+	qr/must not be 0/,
+	'fails with -m value 0 in the first part');
 command_fails_like(
 	[ 'pg_resetwal', '-m' => '10,0', $node->data_dir ],
 	qr/must not be 0/,
-- 
2.47.3

Reply via email to