From d99b0456390573cb2df3324064e8d87c05cce327 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.haribabu@gmail.com>
Date: Thu, 7 Jun 2018 18:11:51 +1000
Subject: [PATCH] Allow taget-session-attrs to accept prefer-read option

With this new prefer-read option, the connection is preferred
to connect to a read-only server if available in the connection
string, otherwise connect to a read-write server.
---
 doc/src/sgml/libpq.sgml               |  13 ++-
 src/interfaces/libpq/fe-connect.c     | 146 ++++++++++++++++++++++----
 src/interfaces/libpq/libpq-fe.h       |   2 +
 src/interfaces/libpq/libpq-int.h      |   3 +-
 src/test/recovery/t/001_stream_rep.pl |  14 ++-
 5 files changed, 152 insertions(+), 26 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 06d909e804..b70cf04a61 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1584,8 +1584,17 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         successful connection; if it returns <literal>on</literal>, the connection
         will be closed.  If multiple hosts were specified in the connection
         string, any remaining servers will be tried just as if the connection
-        attempt had failed.  The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
+        attempt had failed.
+      </para>
+      <para>
+        If this parameter is set to <literal>prefer-read</literal>, connections
+        where <literal>SHOW transaction_read_only</literal> returns <literal>on</literal>
+        are preferred. If no such connections can be found, then a connection
+        that allows read-write transactions will be accepted.
+      </para>
+      <para>
+        The default value of this parameter,<literal>any</literal>, regards all
+        connections as acceptable.
       </para>
       </listitem>
     </varlistentry>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index d001bc513d..93d9abd3fb 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -322,7 +322,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1240,7 +1240,8 @@ connectOptions2(PGconn *conn)
 	if (conn->target_session_attrs)
 	{
 		if (strcmp(conn->target_session_attrs, "any") != 0
-			&& strcmp(conn->target_session_attrs, "read-write") != 0)
+			&& strcmp(conn->target_session_attrs, "read-write") != 0
+			&& strcmp(conn->target_session_attrs, "prefer-read") != 0)
 		{
 			conn->status = CONNECTION_BAD;
 			printfPQExpBuffer(&conn->errorMessage,
@@ -2083,6 +2084,7 @@ PQconnectPoll(PGconn *conn)
 		case CONNECTION_SSL_STARTUP:
 		case CONNECTION_NEEDED:
 		case CONNECTION_CHECK_WRITABLE:
+		case CONNECTION_CHECK_READONLY:
 		case CONNECTION_CONSUME:
 			break;
 
@@ -2123,13 +2125,28 @@ keep_going:						/* We will come back to here until there is
 
 		if (conn->whichhost + 1 >= conn->nconnhost)
 		{
-			/*
-			 * Oops, no more hosts.  An appropriate error message is already
-			 * set up, so just set the right status.
-			 */
-			goto error_return;
+			if (conn->read_write_host_index >= 0)
+			{
+				/*
+				 * Go to here means failed to connect to
+				 * read-only servers and now try connect to
+				 * read-write server again. Only under the
+				 * 'prefer-read' scenario will go to here.
+				 */
+				conn->whichhost = conn->read_write_host_index;
+				conn->read_write_host_index = -2;
+			}
+			else
+			{
+				/*
+				 * Oops, no more hosts.  An appropriate error message is already
+				 * set up, so just set the right status.
+				 */
+				goto error_return;
+			}
 		}
-		conn->whichhost++;
+		else
+			conn->whichhost++;
 
 		/* Drop any address info for previous host */
 		release_conn_addrinfo(conn);
@@ -2317,6 +2334,7 @@ keep_going:						/* We will come back to here until there is
 							conn->try_next_addr = true;
 							goto keep_going;
 						}
+
 						appendPQExpBuffer(&conn->errorMessage,
 										  libpq_gettext("could not create socket: %s\n"),
 										  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
@@ -3158,7 +3176,8 @@ keep_going:						/* We will come back to here until there is
 				}
 
 				/*
-				 * If a read-write connection is required, see if we have one.
+				 * If a read-write or prefer-read connection is required,
+				 * see if we have one.
 				 *
 				 * Servers before 7.4 lack the transaction_read_only GUC, but
 				 * by the same token they don't have any read-only mode, so we
@@ -3166,7 +3185,8 @@ keep_going:						/* We will come back to here until there is
 				 */
 				if (conn->sversion >= 70400 &&
 					conn->target_session_attrs != NULL &&
-					strcmp(conn->target_session_attrs, "read-write") == 0)
+					(strcmp(conn->target_session_attrs, "read-write") == 0 ||
+					 strcmp(conn->target_session_attrs, "prefer-read") == 0))
 				{
 					/*
 					 * Save existing error messages across the PQsendQuery
@@ -3185,11 +3205,40 @@ keep_going:						/* We will come back to here until there is
 						restoreErrorMessage(conn, &savedMessage);
 						goto error_return;
 					}
-					conn->status = CONNECTION_CHECK_WRITABLE;
+
+					if (strcmp(conn->target_session_attrs, "read-write") == 0)
+						conn->status = CONNECTION_CHECK_WRITABLE;
+					else
+						conn->status = CONNECTION_CHECK_READONLY;
+
 					restoreErrorMessage(conn, &savedMessage);
 					return PGRES_POLLING_READING;
 				}
 
+				/*
+				 * Requested type is prefer-read then record this connection index
+				 * and try the other before considering it later
+				 */
+				if ((conn->target_session_attrs != NULL) &&
+						(strcmp(conn->target_session_attrs, "prefer-read") == 0) &&
+						(conn->read_write_host_index != -2))
+				{
+					/* Close connection politely. */
+					conn->status = CONNECTION_OK;
+					sendTerminateConn(conn);
+
+					/* Record it */
+					if (conn->read_write_host_index == -1)
+						conn->read_write_host_index = conn->whichhost;
+
+					/*
+					 * Try next host if any, but we don't want to consider
+					 * additional addresses for this host.
+					 */
+					conn->try_next_host = true;
+					goto keep_going;
+				}
+
 				/* We can release the address list now. */
 				release_conn_addrinfo(conn);
 
@@ -3226,9 +3275,9 @@ keep_going:						/* We will come back to here until there is
 			}
 
 			/*
-			 * If a read-write connection is required, see if we have one.
-			 * (This should match the stanza in the CONNECTION_AUTH_OK case
-			 * above.)
+			 * If a read-write or prefer-read connection is required,
+			 * see if we have one. (This should match the stanza in the
+			 * CONNECTION_AUTH_OK case above.)
 			 *
 			 * Servers before 7.4 lack the transaction_read_only GUC, but by
 			 * the same token they don't have any read-only mode, so we may
@@ -3236,7 +3285,8 @@ keep_going:						/* We will come back to here until there is
 			 */
 			if (conn->sversion >= 70400 &&
 				conn->target_session_attrs != NULL &&
-				strcmp(conn->target_session_attrs, "read-write") == 0)
+				((strcmp(conn->target_session_attrs, "read-write") == 0) ||
+				 (strcmp(conn->target_session_attrs, "prefer-read") == 0)))
 			{
 				if (!saveErrorMessage(conn, &savedMessage))
 					goto error_return;
@@ -3248,11 +3298,40 @@ keep_going:						/* We will come back to here until there is
 					restoreErrorMessage(conn, &savedMessage);
 					goto error_return;
 				}
-				conn->status = CONNECTION_CHECK_WRITABLE;
+
+				if (strcmp(conn->target_session_attrs, "read-write") == 0)
+					conn->status = CONNECTION_CHECK_WRITABLE;
+				else
+					conn->status = CONNECTION_CHECK_READONLY;
+
 				restoreErrorMessage(conn, &savedMessage);
 				return PGRES_POLLING_READING;
 			}
 
+			/*
+			 * Requested type is prefer-read then record this connection index
+			 * and try the other before considering it later
+			 */
+			if ((conn->target_session_attrs != NULL) &&
+					(strcmp(conn->target_session_attrs, "prefer-read") == 0) &&
+					(conn->read_write_host_index != -2))
+			{
+				/* Close connection politely. */
+				conn->status = CONNECTION_OK;
+				sendTerminateConn(conn);
+
+				/* Record it */
+				if (conn->read_write_host_index == -1)
+					conn->read_write_host_index = conn->whichhost;
+
+				/*
+				 * Try next host if any, but we don't want to consider
+				 * additional addresses for this host.
+				 */
+				conn->try_next_host = true;
+				goto keep_going;
+			}
+
 			/* We can release the address list now. */
 			release_conn_addrinfo(conn);
 
@@ -3291,13 +3370,16 @@ keep_going:						/* We will come back to here until there is
 				return PGRES_POLLING_OK;
 			}
 		case CONNECTION_CHECK_WRITABLE:
+		case CONNECTION_CHECK_READONLY:
 			{
+				ConnStatusType oldstatus;
 				const char *displayed_host;
 				const char *displayed_port;
 
 				if (!saveErrorMessage(conn, &savedMessage))
 					goto error_return;
 
+				oldstatus = conn->status;
 				conn->status = CONNECTION_OK;
 				if (!PQconsumeInput(conn))
 				{
@@ -3307,7 +3389,7 @@ keep_going:						/* We will come back to here until there is
 
 				if (PQisBusy(conn))
 				{
-					conn->status = CONNECTION_CHECK_WRITABLE;
+					conn->status = oldstatus;
 					restoreErrorMessage(conn, &savedMessage);
 					return PGRES_POLLING_READING;
 				}
@@ -3319,11 +3401,24 @@ keep_going:						/* We will come back to here until there is
 					char	   *val;
 
 					val = PQgetvalue(res, 0, 0);
-					if (strncmp(val, "on", 2) == 0)
+
+					/*
+					 * Server is read-only and requested mode is read-write, ignore it.
+					 * Server is read-write and requested mode is prefer-read, record
+					 * it for the first time and try to consume in the next scan (it means
+					 * no read-only server is found in the first scan).
+					 */
+					if (((strncmp(val, "on", 2) == 0) &&
+							(oldstatus == CONNECTION_CHECK_WRITABLE)) ||
+						((strncmp(val, "off", 3) == 0) &&
+							(oldstatus == CONNECTION_CHECK_READONLY) &&
+							(conn->read_write_host_index != -2)))
 					{
-						/* Not writable; fail this connection. */
+						/* Not a requested type; fail this connection. */
 						const char *displayed_host;
 						const char *displayed_port;
+						const char *type = (oldstatus == CONNECTION_CHECK_READONLY) ?
+											"read-only" : "writable";
 
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
@@ -3338,15 +3433,20 @@ keep_going:						/* We will come back to here until there is
 							displayed_port = DEF_PGPORT_STR;
 
 						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
+										  libpq_gettext("could not make a %s "
 														"connection to server "
 														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+										  type, displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if ((oldstatus == CONNECTION_CHECK_READONLY) &&
+								(conn->read_write_host_index == -1))
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3355,7 +3455,7 @@ keep_going:						/* We will come back to here until there is
 						goto keep_going;
 					}
 
-					/* Session is read-write, so we're good. */
+					/* Session is requested type, so we're good. */
 					PQclear(res);
 					termPQExpBuffer(&savedMessage);
 
@@ -3568,6 +3668,8 @@ makeEmptyPGconn(void)
 		conn = NULL;
 	}
 
+	conn->read_write_host_index = -1;
+
 	return conn;
 }
 
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 52bd5d2cd8..582f83bb0e 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -65,6 +65,8 @@ typedef enum
 	CONNECTION_NEEDED,			/* Internal state: connect() needed */
 	CONNECTION_CHECK_WRITABLE,	/* Check if we could make a writable
 								 * connection. */
+	CONNECTION_CHECK_READONLY,	/* Check if we could make a read-only
+								 * connection. */
 	CONNECTION_CONSUME			/* Wait for any pending message and consume
 								 * them. */
 } ConnStatusType;
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 975ab33d02..a4716af654 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -363,7 +363,7 @@ struct pg_conn
 	char	   *krbsrvname;		/* Kerberos service name */
 #endif
 
-	/* Type of connection to make.  Possible values: any, read-write. */
+	/* Type of connection to make.  Possible values: any, read-write, perfer-read. */
 	char	   *target_session_attrs;
 
 	/* Optional file to write trace info to */
@@ -397,6 +397,7 @@ struct pg_conn
 	int			nconnhost;		/* # of hosts named in conn string */
 	int			whichhost;		/* host we're currently trying/connected to */
 	pg_conn_host *connhost;		/* details about each named host */
+	int 		read_write_host_index; /* index for first read-write host in connhost */
 
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 8dff5fc720..8a6edd6867 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 29;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -117,6 +117,18 @@ test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"any", 0);
 
+# Connect to standby1 in "prefer-read" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1, "prefer-read",
+	0);
+
+# Connect to standby1 in "prefer-read" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"prefer-read", 0);
+
+# Connect to node_master in "prefer-read" mode with only master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-read", 0);
+
 note "switching to physical replication slot";
 
 # Switch to using a physical replication slot. We can do this without a new
-- 
2.18.0.windows.1

