From e0fd7f52a764a0f13d9ba7697e74220573aef7f7 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Mon, 10 Jun 2024 18:03:27 +0530
Subject: [PATCH v2] Fix infinite loop in walsender during publisher shutdown

When a publisher server is shutting down, there can be a case where
the last WAL record at that point is a continuation record with its
latter part not yet flushed. In such cases, the walsender attempts to
read this unflushed part and ends up in an infinite loop. To prevent
this situation, modify the logical WAL sender to consider itself
caught up in this case. The records that are not fully flushed at this
point are generally not significant, so simply ignoring them should
not cause any issues.
---
 src/backend/replication/walsender.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 28f0a29473..b527a17c9f 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2924,8 +2924,15 @@ XLogSendLogical(void)
 	else if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
 		flushPtr = GetFlushRecPtr();
 
-	/* If EndRecPtr is still past our flushPtr, it means we caught up. */
-	if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
+	/*
+	 * If EndRecPtr is still past our flushPtr, it means we caught up.  When
+	 * the server is shutting down, the latter part of a continuation record
+	 * may be missing.  If got_STOPPING is true, assume we are caught up if the
+	 * last record is missing its continuation part at flushPtr.
+	 */
+	if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr ||
+		(got_STOPPING &&
+		 logical_decoding_ctx->reader->missingContrecPtr == flushPtr))
 		WalSndCaughtUp = true;
 
 	/*
-- 
2.34.1

