From 254d0eef0edcbc96c5cf23c6856bd0467dde4604 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Mon, 10 Jun 2024 11:29:00 +0530
Subject: [PATCH v2] Fix infinite loop in walsender during publisher shutdown

When a publisher server is shutting down, there can be a case where
the last WAL record at that point is a continuation record with its
latter part not yet flushed. In such cases, the walsender attempts to
read this unflushed part and ends up in an infinite loop. To prevent
this situation, modify the logical WAL sender to consider itself
caught up in this case. The records that are not fully flushed at this
point are generally not significant, so simply ignoring them should
not cause any issues.
---
 src/backend/replication/walsender.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 269914bce2..6ae5b99ecd 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3083,8 +3083,15 @@ XLogSendLogical(void)
 	else if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
 		flushPtr = GetFlushRecPtr(NULL);
 
-	/* If EndRecPtr is still past our flushPtr, it means we caught up. */
-	if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
+	/*
+	 * If EndRecPtr is still past our flushPtr, it means we caught up.  When
+	 * the server is shutting down, the latter part of a continuation record
+	 * may be missing.  If got_STOPPING is true, assume we are caught up if the
+	 * last record is missing its continuation part at flushPtr.
+	 */
+	if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr ||
+		(got_STOPPING &&
+		 logical_decoding_ctx->reader->missingContrecPtr == flushPtr))
 		WalSndCaughtUp = true;
 
 	/*
-- 
2.34.1

