From 58fa583c5496390e1919b6a3fca975d8de45cb0e Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 31 Jan 2023 10:58:26 +0900
Subject: [PATCH v3] Fix a race condition of updating
 procArray->replication_slot_xmin.

Previously, if already_locked is false,
ReplicationSlotsComputeRequiredXmin() computed the oldest xmin across
all slots while not holding the ProcArrayLock and acquires the
ProcArrayLock just before updating the
replication_slot_xmin. Therefore, it was possible that by the time a
process computes the oldest xmin and before updating the
replication_slot_xmin, another process computes and updates it. As a
result, the replication_slot_xmin could be overwritten with an old
value and retreated.

In the reported failure, after a walsender who was creating a
replication slot updated the replication_slot_xmin via
CreateInitDecodingContext(), another walsender overwrote it with
InvalidTransactionId. Then the walsender creating the replication slot
ended up computing the oldest safe decoding transaction id without
considering the replication_slot_xmin. That led to an error "cannot
build an initial slot snapshot as oldest safe xid %u follows
snapshot's xmin %u", which was an assertion failure prior to
240e0dbacd3.

This commit changes ReplicationSlotsComputeRequiredXmin() so that it
computes the oldest xmin while holding the ProcArrayLock in exclusive
mode. We keep already_locked parameter in
ProcArraySetReplicationSlotXmin() on backbranches to not break ABI
compatibility.

Discussion: https://postgr.es/m/CAA4eK1L8wYcyTPxNzPGkhuO52WBGoOZbT0A73Le=ZUWYAYmdfw@mail.gmail.com
Backpatch-through: 11
---
 src/backend/replication/slot.c      | 13 +++++++++++++
 src/backend/storage/ipc/procarray.c | 11 ++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 80d96db8eb..916190ae72 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -839,6 +839,16 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked)
 
 	Assert(ReplicationSlotCtl != NULL);
 
+	/*
+	 * It is possible that by the time we compute the agg_xmin here and before
+	 * updating replication_slot_xmin, the CreateInitDecodingContext() will
+	 * compute and update replication_slot_xmin. So, we need to acquire
+	 * ProcArrayLock here to avoid retreating the value of
+	 * replication_slot_xmin.
+	 */
+	if (!already_locked)
+		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
 	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
 
 	for (i = 0; i < max_replication_slots; i++)
@@ -878,6 +888,9 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked)
 	LWLockRelease(ReplicationSlotControlLock);
 
 	ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
+
+	if (!already_locked)
+		LWLockRelease(ProcArrayLock);
 }
 
 /*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 655d11e2f9..4b9306f917 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -3896,22 +3896,19 @@ TerminateOtherDBBackends(Oid databaseId)
  * Install limits to future computations of the xmin horizon to prevent vacuum
  * and HOT pruning from removing affected rows still needed by clients with
  * replication slots.
+ *
+ * NB: the caller must hold ProcArrayLock in an exclusive mode regardless of
+ * already_locked which is unused now but kept for ABI compatibility.
  */
 void
 ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
 								bool already_locked)
 {
-	Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
-
-	if (!already_locked)
-		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
 
 	procArray->replication_slot_xmin = xmin;
 	procArray->replication_slot_catalog_xmin = catalog_xmin;
 
-	if (!already_locked)
-		LWLockRelease(ProcArrayLock);
-
 	elog(DEBUG1, "xmin required by slots: data %u, catalog %u",
 		 xmin, catalog_xmin);
 }
-- 
2.31.1

