Hi,

On Thu, Mar 13, 2025 at 07:33:24PM +0800, Xuneng Zhou wrote:
> Regarding patch 0001, the optimization in pgstat_backend_have_pending_cb
> looks good:

Thanks for looking at it!

> bool
> pgstat_backend_have_pending_cb(void)
> {
> - return (!pg_memory_is_all_zeros(&PendingBackendStats,
> - sizeof(struct PgStat_BackendPending)));
> + return backend_has_iostats;
> }
> 
> Additionally, the function pgstat_flush_backend includes the check:
> 
> + if (!pgstat_backend_have_pending_cb())
>     return false;
> 
> However, I think we might need to revise the comment (and possibly the
> function name) for clarity:
> 
> /*
>  * Check if there are any backend stats waiting to be flushed.
>  */

The comment is not exactly this one on the current HEAD, it looks like that 
you're
looking at a previous version of the core code.

> Originally, this function was intended to check multiple types of backend
> statistics, which made sense when PendingBackendStats was the centralized
> structure for various pending backend stats. However, since
> PgStat_PendingWalStats was removed from PendingBackendStats earlier, and
> now this patch introduces the backend_has_iostats variable, the scope of
> this function appears even narrower. This narrowed functionality no longer
> aligns closely with the original function name and its associated comment.

I don't think so, as since 76def4cdd7c, a pgstat_backend_wal_have_pending() 
check
has been added to pgstat_backend_have_pending_cb(). You're probably looking at
a version prior to 76def4cdd7c.

This particular sub-patch needs a rebase though, done in the attached. 0001
remains unchanged as compared to the v4 one just shared up-thread. If 0001 goes
in, merging 0002 would be less beneficial (as compared to v3).

Regards,

-- 
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
>From 3fa8bd30c15a99919601283f59f579e64b03c121 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Date: Tue, 25 Feb 2025 10:18:05 +0000
Subject: [PATCH v4 1/2] Flush the IO statistics of active walsenders

The walsender does not flush its IO statistics until it exits.
The issue is there since pg_stat_io has been introduced in a9c70b46dbe.
This commits:

1. ensures it does not wait to exit to flush its IO statistics
2. flush its IO statistics periodically to not overload the WAL sender
3. adds a test for a physical walsender (a logical walsender had the same issue
but the fix is in the same code path)
---
 src/backend/replication/walsender.c   | 60 +++++++++++++++++++--------
 src/backend/utils/activity/pgstat.c   |  2 -
 src/include/pgstat.h                  |  2 +
 src/test/recovery/t/001_stream_rep.pl | 16 +++++++
 4 files changed, 60 insertions(+), 20 deletions(-)
  70.5% src/backend/replication/
   3.1% src/backend/utils/activity/
   3.6% src/include/
  22.6% src/test/recovery/t/

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 446d10c1a7d..9b44d4ae600 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -90,6 +90,7 @@
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/pg_lsn.h"
+#include "utils/pgstat_internal.h"
 #include "utils/ps_status.h"
 #include "utils/timeout.h"
 #include "utils/timestamp.h"
@@ -2740,6 +2741,8 @@ WalSndCheckTimeOut(void)
 static void
 WalSndLoop(WalSndSendDataCallback send_data)
 {
+	TimestampTz last_flush = 0;
+
 	/*
 	 * Initialize the last reply timestamp. That enables timeout processing
 	 * from hereon.
@@ -2834,30 +2837,51 @@ WalSndLoop(WalSndSendDataCallback send_data)
 		 * WalSndWaitForWal() handle any other blocking; idle receivers need
 		 * its additional actions.  For physical replication, also block if
 		 * caught up; its send_data does not block.
+		 *
+		 * When the WAL sender is caught up or has pending data to send, we
+		 * also periodically report I/O statistics. It's done periodically to
+		 * not overload the WAL sender.
 		 */
-		if ((WalSndCaughtUp && send_data != XLogSendLogical &&
-			 !streamingDoneSending) ||
-			pq_is_send_pending())
+		if ((WalSndCaughtUp && !streamingDoneSending) || pq_is_send_pending())
 		{
-			long		sleeptime;
-			int			wakeEvents;
+			TimestampTz now;
 
-			if (!streamingDoneReceiving)
-				wakeEvents = WL_SOCKET_READABLE;
-			else
-				wakeEvents = 0;
+			now = GetCurrentTimestamp();
 
-			/*
-			 * Use fresh timestamp, not last_processing, to reduce the chance
-			 * of reaching wal_sender_timeout before sending a keepalive.
-			 */
-			sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp());
+			if (TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
+			{
+				/*
+				 * Report IO statistics
+				 */
+				pgstat_flush_io(false);
+				(void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO);
+				last_flush = now;
+			}
 
-			if (pq_is_send_pending())
-				wakeEvents |= WL_SOCKET_WRITEABLE;
+			if (send_data != XLogSendLogical || pq_is_send_pending())
+			{
+				long		sleeptime;
+				int			wakeEvents;
+
+				if (!streamingDoneReceiving)
+					wakeEvents = WL_SOCKET_READABLE;
+				else
+					wakeEvents = 0;
+
+				/*
+				 * Use fresh timestamp, not last_processing, to reduce the
+				 * chance of reaching wal_sender_timeout before sending a
+				 * keepalive.
+				 */
+				sleeptime = WalSndComputeSleeptime(now);
+
+				if (pq_is_send_pending())
+					wakeEvents |= WL_SOCKET_WRITEABLE;
+
+				/* Sleep until something happens or we time out */
+				WalSndWait(wakeEvents, sleeptime, WAIT_EVENT_WAL_SENDER_MAIN);
+			}
 
-			/* Sleep until something happens or we time out */
-			WalSndWait(wakeEvents, sleeptime, WAIT_EVENT_WAL_SENDER_MAIN);
 		}
 	}
 }
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 3168b825e25..1bf84cbf64e 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -123,8 +123,6 @@
  * ----------
  */
 
-/* minimum interval non-forced stats flushes.*/
-#define PGSTAT_MIN_INTERVAL			1000
 /* how long until to block flushing pending stats updates */
 #define PGSTAT_MAX_INTERVAL			60000
 /* when to call pgstat_report_stat() again, even when idle */
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index def6b370ac1..d1b15bf7757 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -22,6 +22,8 @@
 #include "utils/relcache.h"
 #include "utils/wait_event.h"	/* for backward compatibility */	/* IWYU pragma: export */
 
+/* minimum interval non-forced stats flushes.*/
+#define PGSTAT_MIN_INTERVAL                 1000
 
 /* ----------
  * Paths for the statistics files (relative to installation's $PGDATA).
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3945f00ab88..3371895ab1d 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -42,6 +42,9 @@ $node_standby_2->init_from_backup($node_standby_1, $backup_name,
 	has_streaming => 1);
 $node_standby_2->start;
 
+# To check that an active walsender updates its IO statistics below.
+$node_primary->safe_psql('postgres', "SELECT pg_stat_reset_shared('io')");
+
 # Create some content on primary and check its presence in standby nodes
 $node_primary->safe_psql('postgres',
 	"CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
@@ -329,6 +332,19 @@ $node_primary->psql(
 
 note "switching to physical replication slot";
 
+# Wait for the walsender to update its IO statistics.
+# Has to be done before the next restart and far enough from the
+# pg_stat_reset_shared('io') to minimize the risk of polling for too long.
+$node_primary->poll_query_until(
+	'postgres',
+	qq[SELECT sum(reads) > 0
+       FROM pg_catalog.pg_stat_io
+       WHERE backend_type = 'walsender'
+       AND object = 'wal']
+  )
+  or die
+  "Timed out while waiting for the walsender to update its IO statistics";
+
 # Switch to using a physical replication slot. We can do this without a new
 # backup since physical slots can go backwards if needed. Do so on both
 # standbys. Since we're going to be testing things that affect the slot state,
-- 
2.34.1

>From abe58c82c35b8193d2cb8448b8147debdd2cb2f7 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Date: Mon, 3 Mar 2025 10:02:40 +0000
Subject: [PATCH v4 2/2] Add a new backend_has_iostats global variable

It behaves as the existing have_iostats and replace the existing pg_memory_is_all_zeros()
calls in flushing backend stats functions. Indeed some perf measurements report
that those calls are responsible for a large part of the flushing backend stats
functions.
---
 src/backend/utils/activity/pgstat_backend.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
 100.0% src/backend/utils/activity/

diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index a8cb54a7732..82dae972c4c 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -37,6 +37,7 @@
  * memory allocation.
  */
 static PgStat_BackendPending PendingBackendStats;
+static bool backend_has_iostats = false;
 
 /*
  * WAL usage counters saved from pgWalUsage at the previous call to
@@ -76,6 +77,8 @@ pgstat_count_backend_io_op(IOObject io_object, IOContext io_context,
 
 	PendingBackendStats.pending_io.counts[io_object][io_context][io_op] += cnt;
 	PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes;
+
+	backend_has_iostats = true;
 }
 
 /*
@@ -158,8 +161,7 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref)
 	 * statistics.  In this case, avoid unnecessarily modifying the stats
 	 * entry.
 	 */
-	if (pg_memory_is_all_zeros(&PendingBackendStats.pending_io,
-							   sizeof(struct PgStat_PendingIO)))
+	if (!backend_has_iostats)
 		return;
 
 	shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats;
@@ -190,6 +192,8 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref)
 	 * Clear out the statistics buffer, so it can be re-used.
 	 */
 	MemSet(&PendingBackendStats.pending_io, 0, sizeof(PgStat_PendingIO));
+
+	backend_has_iostats = false;
 }
 
 /*
@@ -259,9 +263,7 @@ pgstat_flush_backend(bool nowait, bits32 flags)
 		return false;
 
 	/* Some IO data pending? */
-	if ((flags & PGSTAT_BACKEND_FLUSH_IO) &&
-		!pg_memory_is_all_zeros(&PendingBackendStats.pending_io,
-								sizeof(struct PgStat_PendingIO)))
+	if ((flags & PGSTAT_BACKEND_FLUSH_IO) && backend_has_iostats)
 		has_pending_data = true;
 
 	/* Some WAL data pending? */
@@ -298,9 +300,7 @@ pgstat_backend_have_pending_cb(void)
 	if (!pgstat_tracks_backend_bktype(MyBackendType))
 		return false;
 
-	return (!pg_memory_is_all_zeros(&PendingBackendStats,
-									sizeof(struct PgStat_BackendPending)) ||
-			pgstat_backend_wal_have_pending());
+	return (backend_has_iostats || pgstat_backend_wal_have_pending());
 }
 
 /*
-- 
2.34.1

Reply via email to