From d553885c36a2b80c5eb9b0704ba18bd76ee079cb Mon Sep 17 00:00:00 2001
From: Zhijie Hou <houzj.fnst@fujitsu.com>
Date: Wed, 27 Aug 2025 18:11:38 +0800
Subject: [PATCH v2 1/2] Avoid retaining conflict-related data when no tables
 are subscribed

This commit fixes an issue where conflict-related data was unnecessarily
retained when the subscription does not have a table.
---
 src/backend/replication/logical/tablesync.c | 26 +++++++++++++++++++++
 src/backend/replication/logical/worker.c    | 14 +++++++----
 src/include/replication/worker_internal.h   |  1 +
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index d3356bc84ee..e6da4028d39 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -1788,6 +1788,32 @@ AllTablesyncsReady(void)
 	return has_subrels && (table_states_not_ready == NIL);
 }
 
+/*
+ * Return whether the subscription currently has any relations.
+ *
+ * Note: Unlike HasSubscriptionRelations(), this function relies on cached
+ * information for subscription relations. Additionally, it should not be
+ * invoked outside of apply or tablesync workers, as MySubscription must be
+ * initialized first.
+ */
+bool
+HasSubscriptionRelationsCached(void)
+{
+	bool		started_tx;
+	bool		has_subrels;
+
+	/* We need up-to-date subscription tables info here */
+	has_subrels = FetchTableStates(&started_tx);
+
+	if (started_tx)
+	{
+		CommitTransactionCommand();
+		pgstat_report_stat(true);
+	}
+
+	return has_subrels;
+}
+
 /*
  * Update the two_phase state of the specified subscription in pg_subscription.
  */
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index f1ebd63e792..38f27e1cb50 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -4595,11 +4595,17 @@ wait_for_local_flush(RetainDeadTuplesData *rdt_data)
 	 * workers is complex and not worth the effort, so we simply return if not
 	 * all tables are in the READY state.
 	 *
-	 * It is safe to add new tables with initial states to the subscription
-	 * after this check because any changes applied to these tables should
-	 * have a WAL position greater than the rdt_data->remote_lsn.
+	 * Advancing the transaction ID is also necessary when no tables are
+	 * subscribed, as it prevents unnecessary retention of dead tuples. Although
+	 * it seems feasible to skip all phases and directly assign candidate_xid to
+	 * oldest_nonremovable_xid in the RDT_GET_CANDIDATE_XID phase when no tables
+	 * are currently subscribed, this approach is unsafe. This is because new
+	 * tables may be added to the subscription after the initial table check,
+	 * requiring tuples deleted before candidate_xid for conflict detection in
+	 * upcoming transactions. Therefore, it remains necessary to wait for all
+	 * concurrent transactions to be fully applied.
 	 */
-	if (!AllTablesyncsReady())
+	if (HasSubscriptionRelationsCached() && !AllTablesyncsReady())
 	{
 		TimestampTz now;
 
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
index 62ea1a00580..de003802612 100644
--- a/src/include/replication/worker_internal.h
+++ b/src/include/replication/worker_internal.h
@@ -272,6 +272,7 @@ extern void ReplicationOriginNameForLogicalRep(Oid suboid, Oid relid,
 											   char *originname, Size szoriginname);
 
 extern bool AllTablesyncsReady(void);
+extern bool HasSubscriptionRelationsCached(void);
 extern void UpdateTwoPhaseState(Oid suboid, char new_state);
 
 extern void process_syncing_tables(XLogRecPtr current_lsn);
-- 
2.51.0.windows.1

