From 3641c20b2f5fec59642bb8515cf017f8188c8058 Mon Sep 17 00:00:00 2001
From: Khanna <Shubham.Khanna@fujitsu.com>
Date: Tue, 24 Dec 2024 12:27:27 +0530
Subject: [PATCH v4] Validate-max_slot_wal_keep_size-in-pg_createsubscriber

This patch introduces validation for the 'max_slot_wal_keep_size' GUC in the
'pg_createsubscriber' utility. The utility now checks that the publisher's
'max_slot_wal_keep_size' is set to '-1' during subscription creation.
This ensures proper functioning of logical replication slots.

The 'pg_createsubscriber' utility is updated to fetch and validate the
'max_slot_wal_keep_size' setting from the publisher. A warning is raised during
the '--dry-run' mode if the configuration is set to a non-default value.

By ensuring 'max_slot_wal_keep_size' is -1, this patch prevents the potential
deletion of required WAL files on the publisher that could disrupt logical
replication. A test case has been added to validate correct warning reporting
when 'max_slot_wal_keep_size' is misconfigured.
---
 doc/src/sgml/ref/pg_createsubscriber.sgml     |  7 ++++++
 src/bin/pg_basebackup/pg_createsubscriber.c   | 25 ++++++++++++++++++-
 .../t/040_pg_createsubscriber.pl              | 23 ++++++++++++++---
 3 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/doc/src/sgml/ref/pg_createsubscriber.sgml b/doc/src/sgml/ref/pg_createsubscriber.sgml
index 26b8e64a4e..59bc97728e 100644
--- a/doc/src/sgml/ref/pg_createsubscriber.sgml
+++ b/doc/src/sgml/ref/pg_createsubscriber.sgml
@@ -313,6 +313,13 @@ PostgreSQL documentation
     linkend="guc-max-wal-senders"/> configured to a value greater than or equal
     to the number of specified databases and existing WAL sender processes.
    </para>
+
+   <para>
+    The 'max_slot_wal_keep_size' must be set to -1 to prevent the automatic
+    removal of WAL files needed by replication slots. Setting this parameter to
+    a specific size may lead to replication failures if required WAL files are
+    prematurely deleted.
+   </para>
   </refsect2>
 
   <refsect2>
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c
index e96370a9ec..3703dae33a 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -849,6 +849,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	int			max_walsenders;
 	int			cur_walsenders;
 	int			max_prepared_transactions;
+	int			max_slot_wal_keep_size;
 
 	pg_log_info("checking settings on publisher");
 
@@ -872,6 +873,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	 * - wal_level = logical
 	 * - max_replication_slots >= current + number of dbs to be converted
 	 * - max_wal_senders >= current + number of dbs to be converted
+	 * - max_slot_wal_keep_size = -1 (to prevent deletion of required WAL files)
 	 * -----------------------------------------------------------------------
 	 */
 	res = PQexec(conn,
@@ -880,7 +882,8 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 				 " (SELECT count(*) FROM pg_catalog.pg_replication_slots),"
 				 " pg_catalog.current_setting('max_wal_senders'),"
 				 " (SELECT count(*) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'walsender'),"
-				 " pg_catalog.current_setting('max_prepared_transactions')");
+				 " pg_catalog.current_setting('max_prepared_transactions'),"
+				 " pg_catalog.current_setting('max_slot_wal_keep_size')");
 
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
@@ -895,6 +898,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	max_walsenders = atoi(PQgetvalue(res, 0, 3));
 	cur_walsenders = atoi(PQgetvalue(res, 0, 4));
 	max_prepared_transactions = atoi(PQgetvalue(res, 0, 5));
+	max_slot_wal_keep_size = atoi(PQgetvalue(res, 0, 6));
 
 	PQclear(res);
 
@@ -905,6 +909,8 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	pg_log_debug("publisher: current wal senders: %d", cur_walsenders);
 	pg_log_debug("publisher: max_prepared_transactions: %d",
 				 max_prepared_transactions);
+	pg_log_debug("publisher: max_slot_wal_keep_size: %d",
+				 max_slot_wal_keep_size);
 
 	disconnect_database(conn, false);
 
@@ -939,6 +945,23 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 							  "Prepared transactions will be replicated at COMMIT PREPARED.");
 	}
 
+/*
+ * Validate max_slot_wal_keep_size
+ * Logical replication requires max_slot_wal_keep_size to be set to -1 on the
+ * publisher to prevent the deletion of WAL files that are still needed by
+ * replication slots. If this parameter is set to a non-default value, it may
+ * cause replication failures due to required WAL files being prematurely
+ * removed.
+ */
+	if (dry_run && max_slot_wal_keep_size != -1)
+	{
+		pg_log_error("publisher requires max_slot_wal_keep_size to be -1, but only %d remain",
+					 max_slot_wal_keep_size);
+		pg_log_error_hint("Change the configuration parameter \"%s\" on the publisher to %d.",
+						  "max_slot_wal_keep_size", -1);
+		failed = true;
+	}
+
 	pg_free(wal_level);
 
 	if (failed)
diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
index 0a900edb65..f4c1e079b1 100644
--- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
+++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
@@ -318,8 +318,14 @@ $node_p->safe_psql($db1,
 $node_p->wait_for_replay_catchup($node_s);
 $node_s->stop;
 
-# dry run mode on node S
-command_ok(
+# Configure 'max_slot_wal_keep_size = 1' on the publisher and
+# reload configuration
+$node_p->append_conf('postgresql.conf', 'max_slot_wal_keep_size = 1');
+$node_p->reload;
+
+# dry run mode on node S and verify the error message for misconfigured
+# 'max_slot_wal_keep_size'
+command_checks_all(
 	[
 		'pg_createsubscriber', '--verbose',
 		'--recovery-timeout', "$PostgreSQL::Test::Utils::timeout_default",
@@ -335,7 +341,18 @@ command_ok(
 		$db1, '--database',
 		$db2
 	],
-	'run pg_createsubscriber --dry-run on node S');
+	1,
+	[qr/./],
+	[
+		qr/pg_createsubscriber: error: publisher requires max_slot_wal_keep_size to be -1/,
+		qr/Change the configuration parameter "max_slot_wal_keep_size" on the publisher to -1./,
+	],
+	'Validate error for misconfigured max_slot_wal_keep_size on the publisher'
+);
+
+# Reset 'max_slot_wal_keep_size' to default after the test
+$node_p->append_conf('postgresql.conf', 'max_slot_wal_keep_size = -1');
+$node_p->reload;
 
 # Check if node S is still a standby
 $node_s->start;
-- 
2.41.0.windows.3

