Attached please find a patch for PostgreSQL 9.4 which changes the maximum
amount of data that the wal sender will send at any point in time from the
hard-coded value of 128KiB to a user-controllable value up to 16MiB. It has
been primarily tested under 9.4 but there has been some testing with 9.5.

In our lab environment and with a 16MiB setting, we saw substantially
better network utilization (almost 2x!), primarily over high bandwidth
delay product links.

-- 
Jon Nelson
Dyn / Principal Software Engineer
From 5ba24d84d880d756bec538e35c499811d88e2fc3 Mon Sep 17 00:00:00 2001
From: Jon Nelson <jdnel...@dyn.com>
Date: Wed, 7 Sep 2016 07:23:53 -0500
Subject: [PATCH] guc-ify the formerly hard-coded MAX_SEND_SIZE to max_wal_send

---
 src/backend/replication/walsender.c | 14 ++++++++------
 src/backend/utils/misc/guc.c        | 12 ++++++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index b671c43..743d6c8 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -89,7 +89,7 @@
  * because signals are checked only between messages.  128kB (with
  * default 8k blocks) seems like a reasonable guess for now.
  */
-#define MAX_SEND_SIZE (XLOG_BLCKSZ * 16)
+int	max_wal_send_guc = 0;
 
 /* Array of WalSnds in shared memory */
 WalSndCtlData *WalSndCtl = NULL;
@@ -2181,7 +2181,7 @@ retry:
 /*
  * Send out the WAL in its normal physical/stored form.
  *
- * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk,
+ * Read up to max_wal_send bytes of WAL that's been flushed to disk,
  * but not yet sent to the client, and buffer it in the libpq output
  * buffer.
  *
@@ -2195,6 +2195,7 @@ XLogSendPhysical(void)
 	XLogRecPtr	startptr;
 	XLogRecPtr	endptr;
 	Size		nbytes;
+	int		max_wal_send = max_wal_send_guc * 1024;
 
 	if (streamingDoneSending)
 	{
@@ -2333,8 +2334,8 @@ XLogSendPhysical(void)
 
 	/*
 	 * Figure out how much to send in one message. If there's no more than
-	 * MAX_SEND_SIZE bytes to send, send everything. Otherwise send
-	 * MAX_SEND_SIZE bytes, but round back to logfile or page boundary.
+	 * max_wal_send bytes to send, send everything. Otherwise send
+	 * max_wal_send bytes, but round back to logfile or page boundary.
 	 *
 	 * The rounding is not only for performance reasons. Walreceiver relies on
 	 * the fact that we never split a WAL record across two messages. Since a
@@ -2344,7 +2345,7 @@ XLogSendPhysical(void)
 	 */
 	startptr = sentPtr;
 	endptr = startptr;
-	endptr += MAX_SEND_SIZE;
+	endptr += max_wal_send;
 
 	/* if we went beyond SendRqstPtr, back off */
 	if (SendRqstPtr <= endptr)
@@ -2363,7 +2364,8 @@ XLogSendPhysical(void)
 	}
 
 	nbytes = endptr - startptr;
-	Assert(nbytes <= MAX_SEND_SIZE);
+	Assert(nbytes <= max_wal_send);
+	elog(DEBUG2, "walsender sending WAL payload of %d bytes", nbytes);
 
 	/*
 	 * OK to read and send the slice.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a9f31ef..3a5018d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -128,6 +128,7 @@ extern bool synchronize_seqscans;
 extern char *SSLCipherSuites;
 extern char *SSLECDHCurve;
 extern bool SSLPreferServerCiphers;
+extern int max_wal_send_guc;
 
 #ifdef TRACE_SORT
 extern bool trace_sort;
@@ -2145,6 +2146,17 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
+		{"max_wal_send", PGC_SIGHUP, REPLICATION_SENDING,
+			gettext_noop("Sets the maximum WAL payload size for WAL replication."),
+			NULL,
+			GUC_UNIT_KB
+		},
+		&max_wal_send_guc,
+		128, 4, 16384,
+		NULL, NULL, NULL
+	},
+
+	{
 		{"commit_delay", PGC_SUSET, WAL_SETTINGS,
 			gettext_noop("Sets the delay in microseconds between transaction commit and "
 						 "flushing WAL to disk."),
-- 
2.10.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to