From 81c9e5e84302a49d44fd89cde15d4f172a752224 Mon Sep 17 00:00:00 2001
From: Shlok Kyal <shlok.kyal.oss@gmail.com>
Date: Thu, 4 Sep 2025 19:46:24 +0530
Subject: [PATCH v1] Add stats related to slot sync skip

When slot sync is performed, it can happen that it is skipped due to
various reason. This patch adds stats for synced slots regarding this
slot sync skip. This patch adds new columns slot_sync_skip_count,
last_slot_sync_skip and slot_sync_skip_reason to view
pg_stat_replication_view.
---
 contrib/test_decoding/expected/stats.out     | 12 +++---
 doc/src/sgml/monitoring.sgml                 | 30 ++++++++++++++
 src/backend/catalog/system_views.sql         |  3 ++
 src/backend/replication/logical/slotsync.c   | 10 +++++
 src/backend/utils/activity/pgstat_replslot.c | 36 +++++++++++++++++
 src/backend/utils/adt/pgstatfuncs.c          | 42 ++++++++++++++++++--
 src/include/catalog/pg_proc.dat              |  6 +--
 src/include/pgstat.h                         |  5 +++
 src/include/replication/slotsync.h           |  9 +++++
 src/test/regress/expected/rules.out          |  5 ++-
 10 files changed, 144 insertions(+), 14 deletions(-)

diff --git a/contrib/test_decoding/expected/stats.out b/contrib/test_decoding/expected/stats.out
index de6dc416130..aa75cdd458c 100644
--- a/contrib/test_decoding/expected/stats.out
+++ b/contrib/test_decoding/expected/stats.out
@@ -78,17 +78,17 @@ SELECT slot_name, spill_txns = 0 AS spill_txns, spill_count = 0 AS spill_count,
 
 -- verify accessing/resetting stats for non-existent slot does something reasonable
 SELECT * FROM pg_stat_get_replication_slot('do-not-exist');
-  slot_name   | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | total_txns | total_bytes | stats_reset 
---------------+------------+-------------+-------------+-------------+--------------+--------------+------------+-------------+-------------
- do-not-exist |          0 |           0 |           0 |           0 |            0 |            0 |          0 |           0 | 
+  slot_name   | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | total_txns | total_bytes | slot_sync_skip_count | last_slot_sync_skip | slot_sync_skip_reason | stats_reset 
+--------------+------------+-------------+-------------+-------------+--------------+--------------+------------+-------------+----------------------+---------------------+-----------------------+-------------
+ do-not-exist |          0 |           0 |           0 |           0 |            0 |            0 |          0 |           0 |                    0 |                     | none                  | 
 (1 row)
 
 SELECT pg_stat_reset_replication_slot('do-not-exist');
 ERROR:  replication slot "do-not-exist" does not exist
 SELECT * FROM pg_stat_get_replication_slot('do-not-exist');
-  slot_name   | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | total_txns | total_bytes | stats_reset 
---------------+------------+-------------+-------------+-------------+--------------+--------------+------------+-------------+-------------
- do-not-exist |          0 |           0 |           0 |           0 |            0 |            0 |          0 |           0 | 
+  slot_name   | spill_txns | spill_count | spill_bytes | stream_txns | stream_count | stream_bytes | total_txns | total_bytes | slot_sync_skip_count | last_slot_sync_skip | slot_sync_skip_reason | stats_reset 
+--------------+------------+-------------+-------------+-------------+--------------+--------------+------------+-------------+----------------------+---------------------+-----------------------+-------------
+ do-not-exist |          0 |           0 |           0 |           0 |            0 |            0 |          0 |           0 |                    0 |                     | none                  | 
 (1 row)
 
 -- spilling the xact
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3f4a27a736e..2760c2a7535 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1644,6 +1644,36 @@ description | Waiting for a newly initialized WAL file to reach durable storage
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+        <structfield>slot_sync_skip_count</structfield><type>bigint</type>
+       </para>
+       <para>
+        Number of times the slot sync is skipped.
+       </para>
+      </entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+        <structfield>last_slot_sync_skip</structfield><type>timestamp with time zone</type>
+       </para>
+       <para>
+        Time at which last slot sync was skipped.
+       </para>
+      </entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+        <structfield>slot_sync_skip_reason</structfield><type>text</type>
+       </para>
+       <para>
+        Reason of the last slot sync skip.
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
         <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index c77fa0234bb..8276c1af2eb 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1061,6 +1061,9 @@ CREATE VIEW pg_stat_replication_slots AS
             s.stream_bytes,
             s.total_txns,
             s.total_bytes,
+            s.slot_sync_skip_count,
+            s.last_slot_sync_skip,
+			s.slot_sync_skip_reason,
             s.stats_reset
     FROM pg_replication_slots as r,
         LATERAL pg_stat_get_replication_slot(slot_name) as s
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 9d0072a49ed..dc0a23ce506 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -218,6 +218,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn),
 						  slot->data.catalog_xmin));
 
+		/* Update stats for slot sync skip */
+		pgstat_report_replslot_sync_skip(slot, SLOT_SYNC_SKIP_REMOTE_BEHIND);
+
 		if (remote_slot_precedes)
 			*remote_slot_precedes = true;
 
@@ -595,6 +598,9 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 				errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn)));
 
+		/* Update stats for slot sync skip */
+		pgstat_report_replslot_sync_skip(slot, SLOT_SYNC_SKIP_NO_CONSISTENT_SNAPSHOT);
+
 		return false;
 	}
 
@@ -646,6 +652,10 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 					   remote_slot->name,
 					   LSN_FORMAT_ARGS(latestFlushPtr)));
 
+		/* Update stats for slot sync skip if slot exist on the standby */
+		if ((slot = SearchNamedReplicationSlot(remote_slot->name, true)))
+			pgstat_report_replslot_sync_skip(slot, SLOT_SYNC_SKIP_STANDBY_BEHIND);
+
 		return false;
 	}
 
diff --git a/src/backend/utils/activity/pgstat_replslot.c b/src/backend/utils/activity/pgstat_replslot.c
index ccfb11c49bf..ae576516e44 100644
--- a/src/backend/utils/activity/pgstat_replslot.c
+++ b/src/backend/utils/activity/pgstat_replslot.c
@@ -27,6 +27,7 @@
 
 #include "replication/slot.h"
 #include "utils/pgstat_internal.h"
+#include "replication/slotsync.h"
 
 
 static int	get_replslot_index(const char *name, bool need_lock);
@@ -101,6 +102,41 @@ pgstat_report_replslot(ReplicationSlot *slot, const PgStat_StatReplSlotEntry *re
 	pgstat_unlock_entry(entry_ref);
 }
 
+/*
+ * Report replication slot sync skip statistics
+ *
+ * We can rely on the stats for the slot to exist and to belong to this
+ * slot. We can only get here if pgstat_create_replslot() or
+ * pgstat_acquire_replslot() have already been called.
+ */
+void
+pgstat_report_replslot_sync_skip(ReplicationSlot *slot, SlotSyncSkipReason reason)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_ReplSlot *shstatent;
+	PgStat_StatReplSlotEntry *statent;
+
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT, InvalidOid,
+											ReplicationSlotIndex(slot), false);
+	shstatent = (PgStatShared_ReplSlot *) entry_ref->shared_stats;
+	statent = &shstatent->stats;
+
+	if (reason != SLOT_SYNC_SKIP_NONE)
+	{
+		statent->slot_sync_skip_count += 1;
+		statent->last_slot_sync_skip = GetCurrentTimestamp();
+		statent->slot_sync_skip_reason = reason;
+	}
+	else
+	{
+		statent->slot_sync_skip_count = 0;
+		statent->last_slot_sync_skip = 0;
+		statent->slot_sync_skip_reason = SLOT_SYNC_SKIP_NONE;
+	}
+
+	pgstat_unlock_entry(entry_ref);
+}
+
 /*
  * Report replication slot creation.
  *
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index c756c2bebaa..fe8feb87a3e 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -2093,6 +2093,26 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
 }
 
+/* Map a SlotSyncSkipReason enum to a human-readable string */
+static char *
+GetSlotSyncSkipReason(SlotSyncSkipReason reason)
+{
+	switch (reason)
+	{
+		case SLOT_SYNC_SKIP_NONE:
+			return pstrdup("none");
+		case SLOT_SYNC_SKIP_REMOTE_BEHIND:
+			return pstrdup("remote_behind");
+		case SLOT_SYNC_SKIP_STANDBY_BEHIND:
+			return pstrdup("standby_behind");
+		case SLOT_SYNC_SKIP_NO_CONSISTENT_SNAPSHOT:
+			return pstrdup("no_consistent_snapshot");
+	}
+
+	Assert(false);
+	return pstrdup("none");
+}
+
 /*
  * Get the statistics for the replication slot. If the slot statistics is not
  * available, return all-zeroes stats.
@@ -2100,7 +2120,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 {
-#define PG_STAT_GET_REPLICATION_SLOT_COLS 10
+#define PG_STAT_GET_REPLICATION_SLOT_COLS 13
 	text	   *slotname_text = PG_GETARG_TEXT_P(0);
 	NameData	slotname;
 	TupleDesc	tupdesc;
@@ -2129,7 +2149,13 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 9, "total_bytes",
 					   INT8OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 10, "stats_reset",
+	TupleDescInitEntry(tupdesc, (AttrNumber) 10, "slot_sync_skip_count",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 11, "last_slot_sync_skip",
+					   TIMESTAMPTZOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 12, "slot_sync_skip_reason",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 	BlessTupleDesc(tupdesc);
 
@@ -2154,11 +2180,19 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 	values[6] = Int64GetDatum(slotent->stream_bytes);
 	values[7] = Int64GetDatum(slotent->total_txns);
 	values[8] = Int64GetDatum(slotent->total_bytes);
+	values[9] = Int64GetDatum(slotent->slot_sync_skip_count);
+
+	if (slotent->last_slot_sync_skip == 0)
+		nulls[10] = true;
+	else
+		values[10] = TimestampTzGetDatum(slotent->last_slot_sync_skip);
+
+	values[11] = CStringGetTextDatum(GetSlotSyncSkipReason(slotent->slot_sync_skip_reason));
 
 	if (slotent->stat_reset_timestamp == 0)
-		nulls[9] = true;
+		nulls[12] = true;
 	else
-		values[9] = TimestampTzGetDatum(slotent->stat_reset_timestamp);
+		values[12] = TimestampTzGetDatum(slotent->stat_reset_timestamp);
 
 	/* Returns the record as Datum */
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 118d6da1ace..147b93c7a71 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5675,9 +5675,9 @@
 { oid => '6169', descr => 'statistics: information about replication slot',
   proname => 'pg_stat_get_replication_slot', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => 'text',
-  proallargtypes => '{text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
-  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{slot_name,slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,stats_reset}',
+  proallargtypes => '{text,text,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz,text,timestamptz}',
+  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{slot_name,slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,slot_sync_skip_count,last_slot_sync_skip,slot_sync_skip_reason,stats_reset}',
   prosrc => 'pg_stat_get_replication_slot' },
 
 { oid => '6230', descr => 'statistics: check if a stats object exists',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f402b17295c..4d1b8fd79a4 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -15,6 +15,7 @@
 #include "portability/instr_time.h"
 #include "postmaster/pgarch.h"	/* for MAX_XFN_CHARS */
 #include "replication/conflict.h"
+#include "replication/slotsync.h"
 #include "utils/backend_progress.h" /* for backward compatibility */	/* IWYU pragma: export */
 #include "utils/backend_status.h"	/* for backward compatibility */	/* IWYU pragma: export */
 #include "utils/pgstat_kind.h"
@@ -395,6 +396,9 @@ typedef struct PgStat_StatReplSlotEntry
 	PgStat_Counter stream_bytes;
 	PgStat_Counter total_txns;
 	PgStat_Counter total_bytes;
+	PgStat_Counter slot_sync_skip_count;
+	TimestampTz last_slot_sync_skip;
+	PgStat_Counter slot_sync_skip_reason;
 	TimestampTz stat_reset_timestamp;
 } PgStat_StatReplSlotEntry;
 
@@ -736,6 +740,7 @@ extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
 extern void pgstat_reset_replslot(const char *name);
 struct ReplicationSlot;
 extern void pgstat_report_replslot(struct ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat);
+extern void pgstat_report_replslot_sync_skip(struct ReplicationSlot *slot, SlotSyncSkipReason reason);
 extern void pgstat_create_replslot(struct ReplicationSlot *slot);
 extern void pgstat_acquire_replslot(struct ReplicationSlot *slot);
 extern void pgstat_drop_replslot(struct ReplicationSlot *slot);
diff --git a/src/include/replication/slotsync.h b/src/include/replication/slotsync.h
index 16b721463dd..359435ff01e 100644
--- a/src/include/replication/slotsync.h
+++ b/src/include/replication/slotsync.h
@@ -23,6 +23,15 @@ extern PGDLLIMPORT bool sync_replication_slots;
 extern PGDLLIMPORT char *PrimaryConnInfo;
 extern PGDLLIMPORT char *PrimarySlotName;
 
+typedef enum SlotSyncSkipReason
+{
+	SLOT_SYNC_SKIP_NONE,		/* No skip */
+	SLOT_SYNC_SKIP_STANDBY_BEHIND,	/* Standby is behind the remote slot */
+	SLOT_SYNC_SKIP_REMOTE_BEHIND,	/* Remote slot is behind the local slot */
+	SLOT_SYNC_SKIP_NO_CONSISTENT_SNAPSHOT	/* Standby could not reach a
+											 * consistent snapshot */
+}			SlotSyncSkipReason;
+
 extern char *CheckAndGetDbnameFromConninfo(void);
 extern bool ValidateSlotSyncParams(int elevel);
 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 35e8aad7701..ae0291c06aa 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2140,9 +2140,12 @@ pg_stat_replication_slots| SELECT s.slot_name,
     s.stream_bytes,
     s.total_txns,
     s.total_bytes,
+    s.slot_sync_skip_count,
+    s.last_slot_sync_skip,
+    s.slot_sync_skip_reason,
     s.stats_reset
    FROM pg_replication_slots r,
-    LATERAL pg_stat_get_replication_slot((r.slot_name)::text) s(slot_name, spill_txns, spill_count, spill_bytes, stream_txns, stream_count, stream_bytes, total_txns, total_bytes, stats_reset)
+    LATERAL pg_stat_get_replication_slot((r.slot_name)::text) s(slot_name, spill_txns, spill_count, spill_bytes, stream_txns, stream_count, stream_bytes, total_txns, total_bytes, slot_sync_skip_count, last_slot_sync_skip, slot_sync_skip_reason, stats_reset)
   WHERE (r.datoid IS NOT NULL);
 pg_stat_slru| SELECT name,
     blks_zeroed,
-- 
2.34.1

