From 272477a0afc5e9b12defe4b10ec2bf3f1e6a8c67 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Mon, 12 Sep 2022 12:22:43 +0000
Subject: [PATCH v3] Avoid race condition in resetting
 XLogCtl->InstallXLogFileSegmentActive

There's a possibility of recovery hitting
Assert(!IsInstallXLogFileSegmentActive()); while reading WAL from
archive immediately after failing to stream from primary. The
pattern seems to be that the walreceiver gets killed or crashed
and its status set to WALRCV_STOPPING or WALRCV_STOPPED when
WaitForWALToBecomeAvailable()'s state machine is in stream mode,
by the team the state machine moves to archive and the code that
resets XLogCtl->InstallXLogFileSegmentActive conditionally isn't
getting hit, hence the assertion failure.

Per commit cc2c7d65fc27e877c9f407587b0b92d46cd6dd16, WAL recycling
and preallocation conflicts with archive recovery hence we needed
to disable it by resetting XLogCtl->InstallXLogFileSegmentActive
before switching WaitForWALToBecomeAvailable()'s state machine
from streaming to archive. The commit did this by adding it as
part of walreceiver shutdown code which isn't called
unconditionally always causing race conditions and assertion failure.

A better way to ensure that WAL recycling and preallocation is
disabled in archive recovery is to reset
XLogCtl->InstallXLogFileSegmentActive when
WaitForWALToBecomeAvailable()'s state machine switches the WAL
source to archive.

Reported-by: Dilip Kumar
Reported-by: Kyotaro Horiguchi
Reviewed-by: Michael Paquier
Author: Bharath Rupireddy
Discussion: https://www.postgresql.org/message-id/CAFiTN-sE3ry=ycMPVtC+Djw4Fd7gbUGVv_qqw6qfzp=JLvqT3g@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/20220909.172949.2223165886970819060.horikyota.ntt@gmail.com
---
 src/backend/access/transam/xlog.c         | 24 ++++++++---------------
 src/backend/access/transam/xlogrecovery.c | 21 ++++++++++++++------
 src/include/access/xlog.h                 |  3 +--
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7a710e6490..ca5b6e798e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4500,7 +4500,7 @@ BootStrapXLOG(void)
 	pg_crc32c	crc;
 
 	/* allow ordinary WAL segment creation, like StartupXLOG() would */
-	SetInstallXLogFileSegmentActive();
+	UpdateInstallXLogFileSegmentActive(false);
 
 	/*
 	 * Select a hopefully-unique system identifier code for this installation.
@@ -5364,7 +5364,7 @@ StartupXLOG(void)
 	 * Allow ordinary WAL segment creation before possibly switching to a new
 	 * timeline, which creates a new segment, and after the last ReadRecord().
 	 */
-	SetInstallXLogFileSegmentActive();
+	UpdateInstallXLogFileSegmentActive(false);
 
 	/*
 	 * Consider whether we need to assign a new timeline ID.
@@ -8838,23 +8838,15 @@ GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 	LWLockRelease(ControlFileLock);
 }
 
-/* Thin wrapper around ShutdownWalRcv(). */
+/* Enable/Disable WAL file recycling and preallocation. */
 void
-XLogShutdownWalRcv(void)
-{
-	ShutdownWalRcv();
-
-	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	XLogCtl->InstallXLogFileSegmentActive = false;
-	LWLockRelease(ControlFileLock);
-}
-
-/* Enable WAL file recycling and preallocation. */
-void
-SetInstallXLogFileSegmentActive(void)
+UpdateInstallXLogFileSegmentActive(bool reset)
 {
 	LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-	XLogCtl->InstallXLogFileSegmentActive = true;
+	if (reset)
+		XLogCtl->InstallXLogFileSegmentActive = false;
+	else
+		XLogCtl->InstallXLogFileSegmentActive = true;
 	LWLockRelease(ControlFileLock);
 }
 
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 9a80084a68..f21f67b85a 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -1395,7 +1395,7 @@ FinishWalRecovery(void)
 	 * over these records and subsequent ones if it's still alive when we
 	 * start writing WAL.
 	 */
-	XLogShutdownWalRcv();
+	ShutdownWalRcv();
 
 	/*
 	 * We are now done reading the xlog from stream. Turn off streaming
@@ -1403,7 +1403,7 @@ FinishWalRecovery(void)
 	 * recovery, e.g., timeline history file) from archive or pg_wal.
 	 *
 	 * Note that standby mode must be turned off after killing WAL receiver,
-	 * i.e., calling XLogShutdownWalRcv().
+	 * i.e., calling ShutdownWalRcv().
 	 */
 	Assert(!WalRcvStreaming());
 	StandbyMode = false;
@@ -3485,7 +3485,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 */
 					if (StandbyMode && CheckForStandbyTrigger())
 					{
-						XLogShutdownWalRcv();
+						ShutdownWalRcv();
 						return XLREAD_FAIL;
 					}
 
@@ -3533,7 +3533,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 * WAL that we restore from archive.
 					 */
 					if (WalRcvStreaming())
-						XLogShutdownWalRcv();
+						ShutdownWalRcv();
 
 					/*
 					 * Before we sleep, re-scan for possible new timelines if
@@ -3599,9 +3599,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 		}
 
 		if (currentSource != oldSource)
+		{
+			/*
+			 * WAL segment installation conflicts with archive recovery, hence
+			 * make sure it is turned off before fetching WAL from archive.
+			 */
+			if (currentSource == XLOG_FROM_ARCHIVE)
+				UpdateInstallXLogFileSegmentActive(true);
+
 			elog(DEBUG2, "switched WAL source from %s to %s after %s",
 				 xlogSourceNames[oldSource], xlogSourceNames[currentSource],
 				 lastSourceFailed ? "failure" : "success");
+		}
 
 		/*
 		 * We've now handled possible failure. Try to read from the chosen
@@ -3663,7 +3672,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 					 */
 					if (pendingWalRcvRestart && !startWalReceiver)
 					{
-						XLogShutdownWalRcv();
+						ShutdownWalRcv();
 
 						/*
 						 * Re-scan for possible new timelines if we were
@@ -3713,7 +3722,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 									 tli, curFileTLI);
 						}
 						curFileTLI = tli;
-						SetInstallXLogFileSegmentActive();
+						UpdateInstallXLogFileSegmentActive(false);
 						RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
 											 PrimarySlotName,
 											 wal_receiver_create_temp_slot);
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index cd674c3c23..a23fe83b52 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -255,9 +255,8 @@ extern void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI);
 extern bool XLogCheckpointNeeded(XLogSegNo new_segno);
 extern void SwitchIntoArchiveRecovery(XLogRecPtr EndRecPtr, TimeLineID replayTLI);
 extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli);
-extern void SetInstallXLogFileSegmentActive(void);
+extern void UpdateInstallXLogFileSegmentActive(bool reset);
 extern bool IsInstallXLogFileSegmentActive(void);
-extern void XLogShutdownWalRcv(void);
 
 /*
  * Routines to start, stop, and get status of a base backup.
-- 
2.34.1

