From 5d590d9e2fbb55e09ea200299754fe51e0f167bb Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 4 Feb 2025 12:04:15 -0800
Subject: [PATCH v1] Fix assertion when decoding XLOG_PARAMETER_CHANGE on
 promoted primary

When a standby replays an XLOG_PARAMETER_CHANGE record that lowers
wal_level to below logical, we invalidate all logical slots if in hot
standby mode. However, if this record was replayed while not in hot
standby mode, logical slots could remain valid even after promotion,
leading to an assertion failure when decoding that WAL record.

To fix this issue, this commit modifies the replication slot startup
process to allow server startup with pre-existing logical slots only
when hot_standby is enabled. This check ensures that logical slots
are invalidated when they become incompatible due to insufficient
wal_level during recovery.
Backpatch to v16 where logical decoding on standby was introduced.

Reviewed-by: Amit Kapila
Discussion: https://postgr.es/m/CAD21AoABoFwGY_Rh2aeE6tEq3HkJxf0c6UeOXn4VV9v6BAQPSw%40mail.gmail.com
Backpatch-through: 16
---
 src/backend/replication/slot.c                | 32 +++++++++++++++
 .../t/035_standby_logical_decoding.pl         | 39 +++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index c57a13d8208..0265de29446 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1929,6 +1929,9 @@ CheckPointReplicationSlots(bool is_shutdown)
 /*
  * Load all replication slots from disk into memory at server startup. This
  * needs to be run before we start crash recovery.
+ *
+ * If in the standby mode mode, we check if there is pre-existing logical
+ * slots.
  */
 void
 StartupReplicationSlots(void)
@@ -1979,6 +1982,35 @@ StartupReplicationSlots(void)
 	if (max_replication_slots <= 0)
 		return;
 
+	/*
+	 * In standby mode, we verify if any logical replication slots exist, and
+	 * only allow startup if hot standby is enabled. This check is necessary
+	 * to ensure logical slots are invalidated when they become incompatible
+	 * due to insufficient wal_level. Otherwise, if the primary reduces
+	 * wal_level < logical while hot standby is disabled, logical slots would
+	 * remain valid even after promotion.
+	 */
+	if (StandbyMode)
+	{
+		for (int i = 0; i < max_replication_slots; i++)
+		{
+			ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
+
+			/* Skip unused slots */
+			if (!s->in_use)
+				continue;
+
+			/* Skip invalid slots */
+			if (s->data.invalidated != RS_INVAL_NONE)
+				continue;
+
+			if (SlotIsLogical(s) && !EnableHotStandby)
+				ereport(FATAL,
+						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+						 errmsg("hot standby must be enabled for pre-existing logical replication slots")));
+		}
+	}
+
 	/* Now that we have recovered all the data, compute replication xmin */
 	ReplicationSlotsComputeRequiredXmin(false);
 	ReplicationSlotsComputeRequiredLSN();
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 505e85d1eb6..c72554cdf33 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -342,6 +342,45 @@ $psql_subscriber{run} = IPC::Run::start(
 	\$psql_subscriber{subscriber_stderr},
 	IPC::Run::timeout($default_timeout));
 
+##################################################
+# Test that the standby requires hot_standby to be
+# enabled for pre-existing logical slots.
+##################################################
+
+# create the logical slots
+$node_standby->create_logical_slot_on_standby($node_primary, 'restart_test');
+$node_standby->stop;
+$node_standby->append_conf(
+    'postgresql.conf', qq[hot_standby = off]);
+
+# Use run_log instead of $node_standby->start because this test expects
+# that the server ends with an error during startup.
+run_log(
+    [
+     'pg_ctl',
+     '--pgdata' => $node_standby->data_dir,
+     '--log' => $node_standby->logfile,
+     'start',
+    ]);
+
+# wait for postgres to terminate
+foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
+{
+    last if !-f $node_standby->data_dir . '/postmaster.pid';
+    usleep(100_000);
+}
+
+# Confirm that the server startup fails with an expected error
+my $logfile = slurp_file($node_standby->logfile());
+ok( $logfile =~
+    qr/FATAL: .* hot standby must be enabled for pre-existing logical replication slots/,
+    "the standby ends with an error during startup because hot_standby was disabled"
+    );
+$node_standby->adjust_conf('postgresql.conf', 'hot_standby', 'on');
+$node_standby->start;
+$node_standby->safe_psql('postgres',
+			 qq[SELECT pg_drop_replication_slot('restart_test')]);
+
 ##################################################
 # Test that logical decoding on the standby
 # behaves correctly.
-- 
2.43.5

