From 6e20bb223012b666161521b5e7249c066467a5f3 Mon Sep 17 00:00:00 2001
From: Jelte Fennema <github-tech@jeltef.nl>
Date: Mon, 12 Sep 2022 09:44:06 +0200
Subject: [PATCH v5] Support load balancing in libpq

Load balancing connections across multiple read replicas is a pretty
common way of scaling out read queries. There are two main ways of doing
so, both with their own advantages and disadvantages:
1. Load balancing at the client level
2. Load balancing by connecting to an intermediary load balancer

Both JBDC (Java) and Npgsql (C#) already support client level load
balancing (option #1). This patch implements client level load balancing
for libpq as well. To stay consistent with the JDBC and Npgsql part of
the  ecosystem, a similar implementation and name for the option are
used. It contains two levels of load balancing:
1. The given hosts are randomly shuffled, before resolving them
    one-by-one.
2. Once a host its addresses get resolved, those addresses are shuffled,
    before trying to connect to them one-by-one.
---
 .cirrus.yml                               |  14 ++
 doc/src/sgml/libpq.sgml                   |  48 +++++
 src/include/libpq/pqcomm.h                |   6 +
 src/interfaces/libpq/fe-connect.c         | 231 +++++++++++++++++++---
 src/interfaces/libpq/libpq-int.h          |  12 +-
 src/interfaces/libpq/meson.build          |   1 +
 src/interfaces/libpq/t/003_loadbalance.pl | 167 ++++++++++++++++
 7 files changed, 446 insertions(+), 33 deletions(-)
 create mode 100644 src/interfaces/libpq/t/003_loadbalance.pl

diff --git a/.cirrus.yml b/.cirrus.yml
index f31923333e..54c3c00e1b 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -293,6 +293,14 @@ task:
     mkdir -m 770 /tmp/cores
     chown root:postgres /tmp/cores
     sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core'
+
+  setup_hosts_file_script: |
+    cat >> /etc/hosts <<-EOF
+      127.0.0.1 pg-loadbalancetest
+      127.0.0.2 pg-loadbalancetest
+      127.0.0.3 pg-loadbalancetest
+    EOF
+
   setup_additional_packages_script: |
     #apt-get update
     #DEBIAN_FRONTEND=noninteractive apt-get -y install ...
@@ -540,6 +548,12 @@ task:
   setup_additional_packages_script: |
     REM choco install -y --no-progress ...
 
+  setup_hosts_file_script: |
+    echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
+    type c:\Windows\System32\Drivers\etc\hosts
+
   # Use /DEBUG:FASTLINK to avoid high memory usage during linking
   configure_script: |
     vcvarsall x64
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index f9558dec3b..6ce7a0c9cc 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1316,6 +1316,54 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       </listitem>
      </varlistentry>
 
+     <varlistentry id="libpq-load-balance-hosts" xreflabel="load_balance_hosts">
+      <term><literal>load_balance_hosts</literal></term>
+      <listitem>
+       <para>
+        Controls whether the client load balances connections across hosts and
+        adresses. The default value is 0, meaning off, this means that hosts are
+        tried in order they are provided and addresses are tried in the order
+        they are received from DNS or a hosts file. If this value is set to 1,
+        meaning on, the hosts and addresses that they resolve to are tried in
+        random order. Subsequent queries once connected will still be sent to
+        the same server. Setting this to 1, is mostly useful when opening
+        multiple connections at the same time, possibly from different machines.
+        This way connections can be load balanced across multiple Postgres
+        servers.
+       </para>
+       <para>
+        When providing multiple hosts, these hosts are resolved in random order.
+        Then if that host resolves to multiple addresses, these addresses are
+        connected to in random order. Only once all addresses for a single host
+        have been tried, the addresses for the next random host will be
+        resolved. This behaviour can lead to non-uniform address selection in
+        certain cases. Such as when not all hosts resolve to the same number of
+        addresses, or when multiple hosts resolve to the same address. So if you
+        want uniform load balancing, this is something to keep in mind. However,
+        non-uniform load balancing also has usecases, e.g. providing the
+        hostname of a larger server multiple times in the host string so it gets
+        more requests.
+       </para>
+       <para>
+        When using this setting it's recommended to also configure a reasonable
+        value for <xref linkend="libpq-connect-connect-timeout"/>. Because then,
+        if one of the nodes that are used for load balancing is not responding,
+        a new node will be tried.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="libpq-random-seed" xreflabel="random_seed">
+      <term><literal>random_seed</literal></term>
+      <listitem>
+       <para>
+        Sets the random seed that is used by <xref linkend="libpq-load-balance-hosts"/>
+        to randomize the host order. This option is mostly useful when running
+        tests that require a stable random order.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="libpq-keepalives" xreflabel="keepalives">
       <term><literal>keepalives</literal></term>
       <listitem>
diff --git a/src/include/libpq/pqcomm.h b/src/include/libpq/pqcomm.h
index fcf68df39b..39e93b1392 100644
--- a/src/include/libpq/pqcomm.h
+++ b/src/include/libpq/pqcomm.h
@@ -27,6 +27,12 @@ typedef struct
 	socklen_t	salen;
 } SockAddr;
 
+typedef struct
+{
+	int			family;
+	SockAddr	addr;
+}			AddrInfo;
+
 /* Configure the UNIX socket location for the well known port. */
 
 #define UNIXSOCK_PATH(path, port, sockdir) \
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index f88d672c6c..b4d3613713 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -241,6 +241,14 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 		"Fallback-Application-Name", "", 64,
 	offsetof(struct pg_conn, fbappname)},
 
+	{"load_balance_hosts", NULL, NULL, NULL,
+		"Load-Balance", "", 1,	/* should be just '0' or '1' */
+	offsetof(struct pg_conn, loadbalance)},
+
+	{"random_seed", NULL, NULL, NULL,
+		"Random-Seed", "", 10,	/* strlen(INT32_MAX) == 10 */
+	offsetof(struct pg_conn, randomseed)},
+
 	{"keepalives", NULL, NULL, NULL,
 		"TCP-Keepalives", "", 1,	/* should be just '0' or '1' */
 	offsetof(struct pg_conn, keepalives)},
@@ -379,6 +387,7 @@ static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions);
 static void freePGconn(PGconn *conn);
 static void closePGconn(PGconn *conn);
 static void release_conn_addrinfo(PGconn *conn);
+static bool store_conn_addrinfo(PGconn *conn, struct addrinfo *addrlist);
 static void sendTerminateConn(PGconn *conn);
 static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage);
 static PQconninfoOption *parse_connection_string(const char *connstr,
@@ -424,6 +433,9 @@ static void pgpassfileWarning(PGconn *conn);
 static void default_threadlock(int acquire);
 static bool sslVerifyProtocolVersion(const char *version);
 static bool sslVerifyProtocolRange(const char *min, const char *max);
+static int	loadBalance(PGconn *conn);
+static bool parse_int_param(const char *value, int *result, PGconn *conn,
+							const char *context);
 
 
 /* global variable because fe-auth.c needs to access it */
@@ -1007,6 +1019,46 @@ parse_comma_separated_list(char **startptr, bool *more)
 	return p;
 }
 
+/*
+ * Initializes the prng_state field of the connection. We want something
+ * unpredictable, so if possible, use high-quality random bits for the
+ * seed. Otherwise, fall back to a seed based on timestamp and PID.
+ */
+static bool
+libpq_prng_init(PGconn *conn)
+{
+	if (unlikely(conn->randomseed))
+	{
+		int			rseed;
+
+		if (!parse_int_param(conn->randomseed, &rseed, conn, "random_seed"))
+		{
+			return false;
+		};
+		pg_prng_seed(&conn->prng_state, rseed);
+	}
+	else if (unlikely(!pg_prng_strong_seed(&conn->prng_state)))
+	{
+		uint64		rseed;
+		time_t		now = time(NULL);
+
+		/*
+		 * Since PIDs and timestamps tend to change more frequently in their
+		 * least significant bits, shift the timestamp left to allow a larger
+		 * total number of seeds in a given time period.  Since that would
+		 * leave only 20 bits of the timestamp that cycle every ~1 second,
+		 * also mix in some higher bits.
+		 */
+		rseed = ((uint64) getpid()) ^
+			((uint64) now << 12) ^
+			((uint64) now >> 20);
+
+		pg_prng_seed(&conn->prng_state, rseed);
+	}
+	return true;
+}
+
+
 /*
  *		connectOptions2
  *
@@ -1019,6 +1071,7 @@ static bool
 connectOptions2(PGconn *conn)
 {
 	int			i;
+	int			loadbalancehosts = loadBalance(conn);
 
 	/*
 	 * Allocate memory for details about each host to which we might possibly
@@ -1164,6 +1217,36 @@ connectOptions2(PGconn *conn)
 		}
 	}
 
+	if (loadbalancehosts < 0)
+	{
+		appendPQExpBufferStr(&conn->errorMessage,
+							 libpq_gettext("loadbalance parameter must be an integer\n"));
+		return false;
+	}
+
+	if (loadbalancehosts)
+	{
+		if (!libpq_prng_init(conn))
+		{
+			return false;
+		}
+
+		/*
+		 * Shuffle connhost with a Durstenfeld/Knuth version of the
+		 * Fisher-Yates shuffle. Source:
+		 * https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+		 */
+		for (i = conn->nconnhost - 1; i > 0; i--)
+		{
+			int			j = pg_prng_uint64_range(&conn->prng_state, 0, i);
+			pg_conn_host temp = conn->connhost[j];
+
+			conn->connhost[j] = conn->connhost[i];
+			conn->connhost[i] = temp;
+		}
+	}
+
+
 	/*
 	 * If user name was not given, fetch it.  (Most likely, the fetch will
 	 * fail, since the only way we get here is if pg_fe_getauthname() failed
@@ -1726,6 +1809,27 @@ connectFailureMessage(PGconn *conn, int errorno)
 		libpq_append_conn_error(conn, "\tIs the server running on that host and accepting TCP/IP connections?");
 }
 
+/*
+ * Should we load balance across hosts? Returns 1 if yes, 0 if no, and -1 if
+ * conn->loadbalance is set to a value which is not parseable as an integer.
+ */
+static int
+loadBalance(PGconn *conn)
+{
+	char	   *ep;
+	int			val;
+
+	if (conn->loadbalance == NULL)
+	{
+		return 0;
+	}
+	val = strtol(conn->loadbalance, &ep, 10);
+	if (*ep)
+		return -1;
+	return val != 0 ? 1 : 0;
+}
+
+
 /*
  * Should we use keepalives?  Returns 1 if yes, 0 if no, and -1 if
  * conn->keepalives is set to a value which is not parseable as an
@@ -2077,7 +2181,7 @@ connectDBComplete(PGconn *conn)
 	time_t		finish_time = ((time_t) -1);
 	int			timeout = 0;
 	int			last_whichhost = -2;	/* certainly different from whichhost */
-	struct addrinfo *last_addr_cur = NULL;
+	int			last_whichaddr = -2;	/* certainly different from whichaddr */
 
 	if (conn == NULL || conn->status == CONNECTION_BAD)
 		return 0;
@@ -2121,11 +2225,11 @@ connectDBComplete(PGconn *conn)
 		if (flag != PGRES_POLLING_OK &&
 			timeout > 0 &&
 			(conn->whichhost != last_whichhost ||
-			 conn->addr_cur != last_addr_cur))
+			 conn->whichaddr != last_whichaddr))
 		{
 			finish_time = time(NULL) + timeout;
 			last_whichhost = conn->whichhost;
-			last_addr_cur = conn->addr_cur;
+			last_whichaddr = conn->whichaddr;
 		}
 
 		/*
@@ -2272,9 +2376,9 @@ keep_going:						/* We will come back to here until there is
 	/* Time to advance to next address, or next host if no more addresses? */
 	if (conn->try_next_addr)
 	{
-		if (conn->addr_cur && conn->addr_cur->ai_next)
+		if (conn->whichaddr < conn->naddr)
 		{
-			conn->addr_cur = conn->addr_cur->ai_next;
+			conn->whichaddr++;
 			reset_connection_state_machine = true;
 		}
 		else
@@ -2287,6 +2391,7 @@ keep_going:						/* We will come back to here until there is
 	{
 		pg_conn_host *ch;
 		struct addrinfo hint;
+		struct addrinfo *addrlist;
 		int			thisport;
 		int			ret;
 		char		portstr[MAXPGPATH];
@@ -2327,7 +2432,7 @@ keep_going:						/* We will come back to here until there is
 		/* Initialize hint structure */
 		MemSet(&hint, 0, sizeof(hint));
 		hint.ai_socktype = SOCK_STREAM;
-		conn->addrlist_family = hint.ai_family = AF_UNSPEC;
+		hint.ai_family = AF_UNSPEC;
 
 		/* Figure out the port number we're going to use. */
 		if (ch->port == NULL || ch->port[0] == '\0')
@@ -2350,8 +2455,8 @@ keep_going:						/* We will come back to here until there is
 		{
 			case CHT_HOST_NAME:
 				ret = pg_getaddrinfo_all(ch->host, portstr, &hint,
-										 &conn->addrlist);
-				if (ret || !conn->addrlist)
+										 &addrlist);
+				if (ret || !addrlist)
 				{
 					libpq_append_conn_error(conn, "could not translate host name \"%s\" to address: %s",
 									   ch->host, gai_strerror(ret));
@@ -2362,8 +2467,8 @@ keep_going:						/* We will come back to here until there is
 			case CHT_HOST_ADDRESS:
 				hint.ai_flags = AI_NUMERICHOST;
 				ret = pg_getaddrinfo_all(ch->hostaddr, portstr, &hint,
-										 &conn->addrlist);
-				if (ret || !conn->addrlist)
+										 &addrlist);
+				if (ret || !addrlist)
 				{
 					libpq_append_conn_error(conn, "could not parse network address \"%s\": %s",
 									   ch->hostaddr, gai_strerror(ret));
@@ -2372,7 +2477,7 @@ keep_going:						/* We will come back to here until there is
 				break;
 
 			case CHT_UNIX_SOCKET:
-				conn->addrlist_family = hint.ai_family = AF_UNIX;
+				hint.ai_family = AF_UNIX;
 				UNIXSOCK_PATH(portstr, thisport, ch->host);
 				if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN)
 				{
@@ -2387,8 +2492,8 @@ keep_going:						/* We will come back to here until there is
 				 * name as a Unix-domain socket path.
 				 */
 				ret = pg_getaddrinfo_all(NULL, portstr, &hint,
-										 &conn->addrlist);
-				if (ret || !conn->addrlist)
+										 &addrlist);
+				if (ret || !addrlist)
 				{
 					libpq_append_conn_error(conn, "could not translate Unix-domain socket path \"%s\" to address: %s",
 									   portstr, gai_strerror(ret));
@@ -2397,8 +2502,15 @@ keep_going:						/* We will come back to here until there is
 				break;
 		}
 
-		/* OK, scan this addrlist for a working server address */
-		conn->addr_cur = conn->addrlist;
+		if (!store_conn_addrinfo(conn, addrlist))
+		{
+			pg_freeaddrinfo_all(hint.ai_family, addrlist);
+			appendPQExpBufferStr(&conn->errorMessage,
+								 libpq_gettext("out of memory\n"));
+			goto error_return;
+		}
+		pg_freeaddrinfo_all(hint.ai_family, addrlist);
+
 		reset_connection_state_machine = true;
 		conn->try_next_host = false;
 	}
@@ -2455,30 +2567,29 @@ keep_going:						/* We will come back to here until there is
 			{
 				/*
 				 * Try to initiate a connection to one of the addresses
-				 * returned by pg_getaddrinfo_all().  conn->addr_cur is the
+				 * returned by pg_getaddrinfo_all().  conn->whichaddr is the
 				 * next one to try.
 				 *
 				 * The extra level of braces here is historical.  It's not
 				 * worth reindenting this whole switch case to remove 'em.
 				 */
 				{
-					struct addrinfo *addr_cur = conn->addr_cur;
 					char		host_addr[NI_MAXHOST];
+					AddrInfo   *addr_cur;
 
 					/*
 					 * Advance to next possible host, if we've tried all of
 					 * the addresses for the current host.
 					 */
-					if (addr_cur == NULL)
+					if (conn->whichaddr == conn->naddr)
 					{
 						conn->try_next_host = true;
 						goto keep_going;
 					}
+					addr_cur = &conn->addr[conn->whichaddr];
 
 					/* Remember current address for possible use later */
-					memcpy(&conn->raddr.addr, addr_cur->ai_addr,
-						   addr_cur->ai_addrlen);
-					conn->raddr.salen = addr_cur->ai_addrlen;
+					memcpy(&conn->raddr, &addr_cur->addr, sizeof(SockAddr));
 
 					/*
 					 * Set connip, too.  Note we purposely ignore strdup
@@ -2494,7 +2605,7 @@ keep_going:						/* We will come back to here until there is
 						conn->connip = strdup(host_addr);
 
 					/* Try to create the socket */
-					conn->sock = socket(addr_cur->ai_family, SOCK_STREAM, 0);
+					conn->sock = socket(addr_cur->family, SOCK_STREAM, 0);
 					if (conn->sock == PGINVALID_SOCKET)
 					{
 						int			errorno = SOCK_ERRNO;
@@ -2505,7 +2616,7 @@ keep_going:						/* We will come back to here until there is
 						 * cases where the address list includes both IPv4 and
 						 * IPv6 but kernel only accepts one family.
 						 */
-						if (addr_cur->ai_next != NULL ||
+						if (conn->whichaddr < conn->naddr ||
 							conn->whichhost + 1 < conn->nconnhost)
 						{
 							conn->try_next_addr = true;
@@ -2531,7 +2642,7 @@ keep_going:						/* We will come back to here until there is
 					 * TCP sockets, nonblock mode, close-on-exec.  Try the
 					 * next address if any of this fails.
 					 */
-					if (addr_cur->ai_family != AF_UNIX)
+					if (addr_cur->family != AF_UNIX)
 					{
 						if (!connectNoDelay(conn))
 						{
@@ -2558,7 +2669,7 @@ keep_going:						/* We will come back to here until there is
 					}
 #endif							/* F_SETFD */
 
-					if (addr_cur->ai_family != AF_UNIX)
+					if (addr_cur->family != AF_UNIX)
 					{
 #ifndef WIN32
 						int			on = 1;
@@ -2650,8 +2761,8 @@ keep_going:						/* We will come back to here until there is
 					 * Start/make connection.  This should not block, since we
 					 * are in nonblock mode.  If it does, well, too bad.
 					 */
-					if (connect(conn->sock, addr_cur->ai_addr,
-								addr_cur->ai_addrlen) < 0)
+					if (connect(conn->sock, (struct sockaddr *) &addr_cur->addr.addr,
+								addr_cur->addr.salen) < 0)
 					{
 						if (SOCK_ERRNO == EINPROGRESS ||
 #ifdef WIN32
@@ -4005,6 +4116,8 @@ freePGconn(PGconn *conn)
 	}
 	free(conn->pgpassfile);
 	free(conn->channel_binding);
+	free(conn->loadbalance);
+	free(conn->randomseed);
 	free(conn->keepalives);
 	free(conn->keepalives_idle);
 	free(conn->keepalives_interval);
@@ -4041,6 +4154,63 @@ freePGconn(PGconn *conn)
 	free(conn);
 }
 
+
+/*
+ * Copies over the AddrInfos from addrlist to the PGconn.
+ */
+static bool
+store_conn_addrinfo(PGconn *conn, struct addrinfo *addrlist)
+{
+	struct addrinfo *ai = addrlist;
+
+	conn->whichaddr = 0;
+
+	conn->naddr = 0;
+	while (ai)
+	{
+		ai = ai->ai_next;
+		conn->naddr++;
+	}
+
+	conn->addr = calloc(conn->naddr, sizeof(AddrInfo));
+	if (conn->addr == NULL)
+	{
+		return false;
+	}
+
+	ai = addrlist;
+	for (int i = 0; i < conn->naddr; i++)
+	{
+		conn->addr[i].family = ai->ai_family;
+
+		memcpy(&conn->addr[i].addr.addr, ai->ai_addr,
+			   ai->ai_addrlen);
+		conn->addr[i].addr.salen = ai->ai_addrlen;
+		ai = ai->ai_next;
+	}
+
+	if (loadBalance(conn))
+	{
+		/*
+		 * Shuffle addr with a Durstenfeld/Knuth version of the Fisher-Yates
+		 * shuffle. Source:
+		 * https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+		 *
+		 * We don't need to initialize conn->prng_state here, because that
+		 * already happened in connectOptions2.
+		 */
+		for (int i = conn->naddr - 1; i > 0; i--)
+		{
+			int			j = pg_prng_uint64_range(&conn->prng_state, 0, i);
+			AddrInfo	temp = conn->addr[j];
+
+			conn->addr[j] = conn->addr[i];
+			conn->addr[i] = temp;
+		}
+	}
+	return true;
+}
+
 /*
  * release_conn_addrinfo
  *	 - Free any addrinfo list in the PGconn.
@@ -4048,11 +4218,10 @@ freePGconn(PGconn *conn)
 static void
 release_conn_addrinfo(PGconn *conn)
 {
-	if (conn->addrlist)
+	if (conn->addr)
 	{
-		pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist);
-		conn->addrlist = NULL;
-		conn->addr_cur = NULL;	/* for safety */
+		free(conn->addr);
+		conn->addr = NULL;
 	}
 }
 
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 512762f999..76ee988038 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -82,6 +82,8 @@ typedef struct
 #endif
 #endif							/* USE_OPENSSL */
 
+#include "common/pg_prng.h"
+
 /*
  * POSTGRES backend dependent Constants.
  */
@@ -373,6 +375,8 @@ struct pg_conn
 	char	   *pgpassfile;		/* path to a file containing password(s) */
 	char	   *channel_binding;	/* channel binding mode
 									 * (require,prefer,disable) */
+	char	   *loadbalance;	/* load balance over hosts */
+	char	   *randomseed;		/* seed for randomization of load balancing */
 	char	   *keepalives;		/* use TCP keepalives? */
 	char	   *keepalives_idle;	/* time between TCP keepalives */
 	char	   *keepalives_interval;	/* time between TCP keepalive
@@ -461,8 +465,10 @@ struct pg_conn
 	PGTargetServerType target_server_type;	/* desired session properties */
 	bool		try_next_addr;	/* time to advance to next address/host? */
 	bool		try_next_host;	/* time to advance to next connhost[]? */
-	struct addrinfo *addrlist;	/* list of addresses for current connhost */
-	struct addrinfo *addr_cur;	/* the one currently being tried */
+	int			naddr;			/* number of addrs returned by getaddrinfo */
+	int			whichaddr;		/* the addr currently being tried */
+	AddrInfo   *addr;			/* the array of addresses for the currently
+								 * tried host */
 	int			addrlist_family;	/* needed to know how to free addrlist */
 	bool		send_appname;	/* okay to send application_name? */
 
@@ -477,6 +483,8 @@ struct pg_conn
 	PGVerbosity verbosity;		/* error/notice message verbosity */
 	PGContextVisibility show_context;	/* whether to show CONTEXT field */
 	PGlobjfuncs *lobjfuncs;		/* private state for large-object access fns */
+	pg_prng_state prng_state;	/* prng state for load balancing connections */
+
 
 	/* Buffer for data received from backend and not yet processed */
 	char	   *inBuffer;		/* currently allocated buffer */
diff --git a/src/interfaces/libpq/meson.build b/src/interfaces/libpq/meson.build
index 8e696f1183..8026398518 100644
--- a/src/interfaces/libpq/meson.build
+++ b/src/interfaces/libpq/meson.build
@@ -114,6 +114,7 @@ tests += {
     'tests': [
       't/001_uri.pl',
       't/002_api.pl',
+      't/003_loadbalance.pl',
     ],
     'env': {'with_ssl': get_option('ssl')},
   },
diff --git a/src/interfaces/libpq/t/003_loadbalance.pl b/src/interfaces/libpq/t/003_loadbalance.pl
new file mode 100644
index 0000000000..07eddbe9cc
--- /dev/null
+++ b/src/interfaces/libpq/t/003_loadbalance.pl
@@ -0,0 +1,167 @@
+# Copyright (c) 2022, PostgreSQL Global Development Group
+use strict;
+use warnings;
+use Config;
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::Cluster;
+use File::Spec::Functions 'catfile';
+use Test::More;
+
+# This tests two different methods of load balancing from libpq
+# 1. Load balancing by providing multiple host and port combinations in the
+#    libpq connection string.
+# 2. By using a hosts file where hostname maps to multiple different IP
+#    addresses. Regular Postgres users wouldn't usually use such a host file,
+#    but this is the easiest way to immitate behaviour of a DNS server that
+#    returns multiple IP addresses for the same DNS record.
+#
+# Testing method 1 is supported on all platforms and works out of the box. But
+# testing method 2 has some more requirements, both on the platform and on the
+# initial setup. If any of these requirements are not met, then method 2 is
+# simply not tested.
+#
+# The requirements to test method 2 are as follows:
+# 1. Windows or Linux should be used.
+# 2. The OS hosts file at /etc/hosts or c:\Windows\System32\Drivers\etc\hosts
+#    should contain the following contents:
+#
+# 127.0.0.1 pg-loadbalancetest
+# 127.0.0.2 pg-loadbalancetest
+# 127.0.0.3 pg-loadbalancetest
+#
+#
+# Windows or Linux are required to test method 2 because these OSes allow
+# binding to 127.0.0.2 and 127.0.0.3 addresess by default, but other OSes
+# don't. We need to bind to different IP addresses, so that we can use these
+# different IP addresses in the hosts file.
+#
+# The hosts file needs to be prepared before running this test. We don't do it
+# on the fly, because it requires root permissions to change the hosts file. In
+# CI we set up the previously mentioned rules in the hosts file, so that this
+# load balancing method is tested.
+
+
+# Cluster setup which is shared for testing both load balancing methods
+my $can_bind_to_127_0_0_2 = $Config{osname} eq 'linux' || $PostgreSQL::Test::Utils::windows_os;
+
+if ($can_bind_to_127_0_0_2)
+{
+	$PostgreSQL::Test::Cluster::use_tcp = 1;
+	$PostgreSQL::Test::Cluster::test_pghost = '127.0.0.1';
+}
+my $port = PostgreSQL::Test::Cluster::get_free_port();
+my $node1 = PostgreSQL::Test::Cluster->new('node1', port => $port);
+my $node2 = PostgreSQL::Test::Cluster->new('node2', port => $port, own_host => 1);
+my $node3 = PostgreSQL::Test::Cluster->new('node3', port => $port, own_host => 1);
+
+# Create a data directory with initdb
+$node1->init();
+$node2->init();
+$node3->init();
+
+# Start the PostgreSQL server
+$node1->start();
+$node2->start();
+$node3->start();
+
+# Start the tests for load balancing method 1
+my $hostlist = $node1->host . ',' . $node2->host . ',' . $node3->host;
+my $portlist = "$port,$port,$port";
+
+$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=123",
+	"seed 123 selects node 1 first",
+	sql => "SELECT 'connect1'",
+	log_like => [qr/statement: SELECT 'connect1'/]);
+
+$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=123",
+	"seed 123 does not select node 2 first",
+	sql => "SELECT 'connect1'",
+	log_unlike => [qr/statement: SELECT 'connect1'/]);
+
+$node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=123",
+	"seed 123 does not select node 3 first",
+	sql => "SELECT 'connect1'",
+	log_unlike => [qr/statement: SELECT 'connect1'/]);
+
+$node3->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=42",
+	"seed 42 selects node 3 first",
+	sql => "SELECT 'connect2'",
+	log_like => [qr/statement: SELECT 'connect2'/]);
+
+$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=42",
+	"seed 42 does not select node 1 first",
+	sql => "SELECT 'connect2'",
+	log_unlike => [qr/statement: SELECT 'connect2'/]);
+
+$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=42",
+	"seed 42 does not select node 2 first",
+	sql => "SELECT 'connect2'",
+	log_unlike => [qr/statement: SELECT 'connect2'/]);
+
+$node3->stop();
+
+$node1->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=42",
+	"seed 42 does select node 1 second",
+	sql => "SELECT 'connect3'",
+	log_like => [qr/statement: SELECT 'connect3'/]);
+
+$node2->connect_ok("host=$hostlist port=$portlist load_balance_hosts=1 random_seed=42",
+	"seed 42 does not select node 2 second",
+	sql => "SELECT 'connect3'",
+	log_unlike => [qr/statement: SELECT 'connect3'/]);
+
+$node3->start();
+
+# Checks for the requirements for testing load balancing method 2
+if (!$can_bind_to_127_0_0_2) {
+	# The OS requirement is not met
+	done_testing();
+	exit;
+}
+
+my $hosts_path;
+if ($windows_os) {
+	$hosts_path = 'c:\Windows\System32\Drivers\etc\hosts';
+}
+else
+{
+	$hosts_path = '/etc/hosts';
+}
+
+my $hosts_content = PostgreSQL::Test::Utils::slurp_file($hosts_path);
+
+if ($hosts_content !~ m/pg-loadbalancetest/) {
+	# Host file is not prepared for this test
+	done_testing();
+	exit;
+}
+
+# Start the tests for load balancing method 2
+$node2->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=1 random_seed=44",
+	"seed 44 selects node 2 first",
+	sql => "SELECT 'connect4'",
+	log_like => [qr/statement: SELECT 'connect4'/]);
+
+$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=1 random_seed=44",
+	"seed 44 does not select node 1 first",
+	sql => "SELECT 'connect4'",
+	log_unlike => [qr/statement: SELECT 'connect4'/]);
+
+$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=1 random_seed=44",
+	"seed 44 does not select node 3 first",
+	sql => "SELECT 'connect4'",
+	log_unlike => [qr/statement: SELECT 'connect4'/]);
+
+$node2->stop();
+
+$node1->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=1 random_seed=44",
+	"seed 44 does select node 1 second",
+	sql => "SELECT 'connect5'",
+	log_like => [qr/statement: SELECT 'connect5'/]);
+
+$node3->connect_ok("host=pg-loadbalancetest port=$port load_balance_hosts=1 random_seed=44",
+	"seed 44 does not select node 3 second",
+	sql => "SELECT 'connect5'",
+	log_unlike => [qr/statement: SELECT 'connect5'/]);
+
+done_testing();
-- 
2.34.1

