From 1d131bbf3b260fffd8a5cfcaf9595839e681f4ba Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 31 Jan 2023 10:58:26 +0900
Subject: [PATCH v2] Fix a race condition of updating
 procArray->replication_slot_xmin.

Previously, ReplicationSlotsComputeRequiredXmin() computed the oldest
xmin across all slots while not holding ProcArrayLock if
already_locked is false, and acquires the ProcArrayLock just before
updating the replication slot xmin. Therefore, if a process calls
ReplicationSlotsComputeRequiredXmin() with already_locked being false
and another process updates the replication slot xmin before the
process acquiring the lock, the slot xmin was overwritten with an old
value.

In the reported failure, a walsender for an apply worker computes
InvalidTransaction as the oldest xmin and overwrote a valid
replication slot xmin value computed by a walsender for a tablesync
worker with this value. Then the walsender for a tablesync worker
ended up computing the transaction id by
GetOldestSafeDecodingTransactionId() without considering replication
slot xmin. That led to an error ""cannot build an initial slot
snapshot as oldest safe xid %u follows snapshot's xmin %u", which was
an assertion failure prior to 240e0dbacd3.

This commit changes ReplicationSlotsComputeRequiredXmin() so that it
computes the oldest xmin while holding ProcArrayLock in exclusive
mode. We keep already_locked parameter in
ProcArraySetReplicationSlotXmin() on backbranches to not break ABI
compatibility.
---
 src/backend/replication/slot.c      |  6 ++++++
 src/backend/storage/ipc/procarray.c | 11 ++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 329599e99d..cb726f9236 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -839,6 +839,9 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked)
 
 	Assert(ReplicationSlotCtl != NULL);
 
+	if (!already_locked)
+		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
 	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
 
 	for (i = 0; i < max_replication_slots; i++)
@@ -871,6 +874,9 @@ ReplicationSlotsComputeRequiredXmin(bool already_locked)
 	LWLockRelease(ReplicationSlotControlLock);
 
 	ProcArraySetReplicationSlotXmin(agg_xmin, agg_catalog_xmin, already_locked);
+
+	if (!already_locked)
+		LWLockRelease(ProcArrayLock);
 }
 
 /*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index dadaa958a8..6c7c0fbb43 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -3885,21 +3885,18 @@ TerminateOtherDBBackends(Oid databaseId)
  * Install limits to future computations of the xmin horizon to prevent vacuum
  * and HOT pruning from removing affected rows still needed by clients with
  * replication slots.
+ *
+ * NB: the caller must hold ProcArrayLock in an exclusive mode regardless of
+ * already_locked which is unused now but kept for ABI compatibility.
  */
 void
 ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
 								bool already_locked)
 {
-	Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
-
-	if (!already_locked)
-		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+	Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
 
 	procArray->replication_slot_xmin = xmin;
 	procArray->replication_slot_catalog_xmin = catalog_xmin;
-
-	if (!already_locked)
-		LWLockRelease(ProcArrayLock);
 }
 
 /*
-- 
2.31.1

