On 2019-Sep-11, Tsunakawa, Takayuki wrote:

> From: Alvaro Herrera from 2ndQuadrant [mailto:alvhe...@alvh.no-ip.org]

> > Remaining patchset attached (per my count it's v13 of your patchset.
> 
> I'm afraid those weren't attached.

Oh, oops. Here they are then.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 247e888266882ab673efd04ecf017846400859ad Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Wed, 27 Feb 2019 11:50:33 +1100
Subject: [PATCH v13 1/8] New TargetSessionAttrsType enum

This new enum is useful to compare the requested session type
instead of comparing it with string always. This may not show
much improvement with current code, but it will be useful with
further patches
---
 src/interfaces/libpq/fe-connect.c | 12 ++++++++----
 src/interfaces/libpq/libpq-fe.h   |  6 ++++++
 src/interfaces/libpq/libpq-int.h  |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 8ba0159313..f104fd48aa 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -1295,8 +1295,11 @@ connectOptions2(PGconn *conn)
 	 */
 	if (conn->target_session_attrs)
 	{
-		if (strcmp(conn->target_session_attrs, "any") != 0
-			&& strcmp(conn->target_session_attrs, "read-write") != 0)
+		if (strcmp(conn->target_session_attrs, "any") == 0)
+			conn->requested_session_type = SESSION_TYPE_ANY;
+		else if (strcmp(conn->target_session_attrs, "read-write") == 0)
+			conn->requested_session_type = SESSION_TYPE_READ_WRITE;
+		else
 		{
 			conn->status = CONNECTION_BAD;
 			printfPQExpBuffer(&conn->errorMessage,
@@ -3449,8 +3452,7 @@ keep_going:						/* We will come back to here until there is
 				 * may just skip the test in that case.
 				 */
 				if (conn->sversion >= 70400 &&
-					conn->target_session_attrs != NULL &&
-					strcmp(conn->target_session_attrs, "read-write") == 0)
+					conn->requested_session_type != SESSION_TYPE_ANY)
 				{
 					/*
 					 * Save existing error messages across the PQsendQuery
@@ -3791,6 +3793,8 @@ makeEmptyPGconn(void)
 	conn->try_gss = true;
 #endif
 
+	conn->requested_session_type = SESSION_TYPE_ANY;
+
 	/*
 	 * We try to send at least 8K at a time, which is the usual size of pipe
 	 * buffers on Unix systems.  That way, when we are sending a large amount
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 5f65db30e4..0612e68c62 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -71,6 +71,12 @@ typedef enum
 	CONNECTION_CHECK_TARGET		/* Check if we have a proper target connection */
 } ConnStatusType;
 
+typedef enum
+{
+	SESSION_TYPE_ANY = 0,		/* Any session (default) */
+	SESSION_TYPE_READ_WRITE		/* Read-write session */
+}			TargetSessionAttrsType;
+
 typedef enum
 {
 	PGRES_POLLING_FAILED = 0,
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index d37bb3ce40..20791b5b73 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -367,6 +367,7 @@ struct pg_conn
 
 	/* Type of connection to make.  Possible values: any, read-write. */
 	char	   *target_session_attrs;
+	TargetSessionAttrsType requested_session_type;
 
 	/* Optional file to write trace info to */
 	FILE	   *Pfdebug;
-- 
2.17.1

>From 9399e3f7f5e85f41871d4e586b0582f697380c0b Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Tue, 10 Sep 2019 12:48:28 -0300
Subject: [PATCH v13 2/8] doc change

---
 doc/src/sgml/libpq.sgml | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 5601485555..23bf1ea632 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1647,17 +1647,27 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
      <varlistentry id="libpq-connect-target-session-attrs" xreflabel="target_session_attrs">
       <term><literal>target_session_attrs</literal></term>
       <listitem>
+       <para>
+        The supported options for this parameter are, <literal>any</literal> and
+        <literal>read-write</literal>. The default value of this parameter,
+        <literal>any</literal>, regards all connections as acceptable.
+        If multiple hosts were specified in the connection string, based on the
+        specified value, any remaining servers will be tried before confirming
+        succesful connection or failure.
+       </para>
+
        <para>
         If this parameter is set to <literal>read-write</literal>, only a
         connection in which read-write transactions are accepted by default
-        is considered acceptable.  The query
-        <literal>SHOW transaction_read_only</literal> will be sent upon any
-        successful connection; if it returns <literal>on</literal>, the connection
-        will be closed.  If multiple hosts were specified in the connection
-        string, any remaining servers will be tried just as if the connection
-        attempt had failed.  The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
-      </para>
+        is considered acceptable.
+       </para>
+
+       <para>
+        To find out whether the server supports read-write transactions are not,
+        query <literal>SHOW transaction_read_only</literal> will be sent upon any
+        successful connection; if it returns <literal>on</literal>, it means server
+        doesn't support read-write transactions.
+       </para>
       </listitem>
     </varlistentry>
     </variablelist>
-- 
2.17.1

>From c0b48576c961ed0c0304ac5c83363acf6a51f818 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Wed, 27 Mar 2019 17:05:24 +1100
Subject: [PATCH v13 3/8] Make transaction_read_only as GUC_REPORT variable

transaction_read_only GUC variable value is used in multi host
connection to identify the required host of read-write, but currently
this carried out by executing a command to find out whether the host
is a read-write or not? Instead of that, Reporting the GUC to the client
upon connection reduces the time to make the connection.
---
 doc/src/sgml/libpq.sgml           | 14 ++++---
 doc/src/sgml/protocol.sgml        |  8 ++--
 src/backend/utils/misc/guc.c      |  2 +-
 src/interfaces/libpq/fe-connect.c | 70 +++++++++++++++++++++++--------
 src/interfaces/libpq/fe-exec.c    |  6 ++-
 src/interfaces/libpq/libpq-int.h  |  1 +
 6 files changed, 74 insertions(+), 27 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 23bf1ea632..edda94196b 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1665,8 +1665,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
        <para>
         To find out whether the server supports read-write transactions are not,
         query <literal>SHOW transaction_read_only</literal> will be sent upon any
-        successful connection; if it returns <literal>on</literal>, it means server
-        doesn't support read-write transactions.
+        successful connection if the server is prior to version 12; if it returns
+        <literal>on</literal>, it means server doesn't support read-write transactions.
+        But for server version 12 or greater uses the value of <varname>transaction_read_only</varname>
+        configuration parameter that is reported by the server upon successful connection.
        </para>
       </listitem>
     </varlistentry>
@@ -1977,14 +1979,16 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName);
        <varname>DateStyle</varname>,
        <varname>IntervalStyle</varname>,
        <varname>TimeZone</varname>,
-       <varname>integer_datetimes</varname>, and
-       <varname>standard_conforming_strings</varname>.
+       <varname>integer_datetimes</varname>,
+       <varname>standard_conforming_strings</varname>, and
+       <varname>transaction_read_only</varname>.
        (<varname>server_encoding</varname>, <varname>TimeZone</varname>, and
        <varname>integer_datetimes</varname> were not reported by releases before 8.0;
        <varname>standard_conforming_strings</varname> was not reported by releases
        before 8.1;
        <varname>IntervalStyle</varname> was not reported by releases before 8.4;
-       <varname>application_name</varname> was not reported by releases before 9.0.)
+       <varname>application_name</varname> was not reported by releases before 9.0;
+       <varname>transaction_read_only</varname> was not reported by release before 12.0.)
        Note that
        <varname>server_version</varname>,
        <varname>server_encoding</varname> and
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 80275215e0..dbf12fcc46 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1283,14 +1283,16 @@ SELECT 1/0;
     <varname>DateStyle</varname>,
     <varname>IntervalStyle</varname>,
     <varname>TimeZone</varname>,
-    <varname>integer_datetimes</varname>, and
-    <varname>standard_conforming_strings</varname>.
+    <varname>integer_datetimes</varname>,
+    <varname>standard_conforming_strings</varname>, and
+    <varname>transaction_read_only</varname>.
     (<varname>server_encoding</varname>, <varname>TimeZone</varname>, and
     <varname>integer_datetimes</varname> were not reported by releases before 8.0;
     <varname>standard_conforming_strings</varname> was not reported by releases
     before 8.1;
     <varname>IntervalStyle</varname> was not reported by releases before 8.4;
-    <varname>application_name</varname> was not reported by releases before 9.0.)
+    <varname>application_name</varname> was not reported by releases before 9.0;
+    <varname>transaction_read_only</varname> was not reported by releases before 12.0.)
     Note that
     <varname>server_version</varname>,
     <varname>server_encoding</varname> and
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..3c47473bcd 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1543,7 +1543,7 @@ static struct config_bool ConfigureNamesBool[] =
 		{"transaction_read_only", PGC_USERSET, CLIENT_CONN_STATEMENT,
 			gettext_noop("Sets the current transaction's read-only status."),
 			NULL,
-			GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+			GUC_REPORT | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
 		},
 		&XactReadOnly,
 		false,
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index f104fd48aa..abac93d6da 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -3454,26 +3454,61 @@ keep_going:						/* We will come back to here until there is
 				if (conn->sversion >= 70400 &&
 					conn->requested_session_type != SESSION_TYPE_ANY)
 				{
-					/*
-					 * Save existing error messages across the PQsendQuery
-					 * attempt.  This is necessary because PQsendQuery is
-					 * going to reset conn->errorMessage, so we would lose
-					 * error messages related to previous hosts we have tried
-					 * and failed to connect to.
-					 */
-					if (!saveErrorMessage(conn, &savedMessage))
-						goto error_return;
-
-					conn->status = CONNECTION_OK;
-					if (!PQsendQuery(conn,
-									 "SHOW transaction_read_only"))
+					if (conn->sversion < 120000)
 					{
+						/*
+						 * Save existing error messages across the PQsendQuery
+						 * attempt.  This is necessary because PQsendQuery is
+						 * going to reset conn->errorMessage, so we would lose
+						 * error messages related to previous hosts we have tried
+						 * and failed to connect to.
+						 */
+						if (!saveErrorMessage(conn, &savedMessage))
+							goto error_return;
+
+						conn->status = CONNECTION_OK;
+						if (!PQsendQuery(conn,
+										 "SHOW transaction_read_only"))
+						{
+							restoreErrorMessage(conn, &savedMessage);
+							goto error_return;
+						}
+						conn->status = CONNECTION_CHECK_WRITABLE;
 						restoreErrorMessage(conn, &savedMessage);
-						goto error_return;
+						return PGRES_POLLING_READING;
+					}
+					else if (conn->transaction_read_only)
+					{
+						/* Not writable; fail this connection. */
+						const char *displayed_host;
+						const char *displayed_port;
+
+						/* Append error report to conn->errorMessage. */
+						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+							displayed_host = conn->connhost[conn->whichhost].hostaddr;
+						else
+							displayed_host = conn->connhost[conn->whichhost].host;
+						displayed_port = conn->connhost[conn->whichhost].port;
+						if (displayed_port == NULL || displayed_port[0] == '\0')
+							displayed_port = DEF_PGPORT_STR;
+
+						appendPQExpBuffer(&conn->errorMessage,
+										  libpq_gettext("could not make a writable "
+														"connection to server "
+														"\"%s:%s\"\n"),
+										  displayed_host, displayed_port);
+
+						/* Close connection politely. */
+						conn->status = CONNECTION_OK;
+						sendTerminateConn(conn);
+
+						/*
+						 * Try next host if any, but we don't want to consider
+						 * additional addresses for this host.
+						 */
+						conn->try_next_host = true;
+						goto keep_going;
 					}
-					conn->status = CONNECTION_CHECK_WRITABLE;
-					restoreErrorMessage(conn, &savedMessage);
-					return PGRES_POLLING_READING;
 				}
 
 				/* We can release the address list now. */
@@ -3786,6 +3821,7 @@ makeEmptyPGconn(void)
 	conn->setenv_state = SETENV_STATE_IDLE;
 	conn->client_encoding = PG_SQL_ASCII;
 	conn->std_strings = false;	/* unless server says differently */
+	conn->transaction_read_only = false;
 	conn->verbosity = PQERRORS_DEFAULT;
 	conn->show_context = PQSHOW_CONTEXT_ERRORS;
 	conn->sock = PGINVALID_SOCKET;
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index b3c59a0992..3c17100e05 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -1059,7 +1059,7 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
 	}
 
 	/*
-	 * Special hacks: remember client_encoding and
+	 * Special hacks: remember client_encoding, transaction_read_only and
 	 * standard_conforming_strings, and convert server version to a numeric
 	 * form.  We keep the first two of these in static variables as well, so
 	 * that PQescapeString and PQescapeBytea can behave somewhat sanely (at
@@ -1113,6 +1113,10 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
 		else
 			conn->sversion = 0; /* unknown */
 	}
+	else if (strcmp(name, "transaction_read_only") == 0)
+	{
+		conn->transaction_read_only = (strcmp(value, "on") == 0);
+	}
 }
 
 
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 20791b5b73..7d26b94f9f 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -432,6 +432,7 @@ struct pg_conn
 	pgParameterStatus *pstatus; /* ParameterStatus data */
 	int			client_encoding;	/* encoding id */
 	bool		std_strings;	/* standard_conforming_strings */
+	bool		transaction_read_only;	/* transaction_read_only */
 	PGVerbosity verbosity;		/* error/notice message verbosity */
 	PGContextVisibility show_context;	/* whether to show CONTEXT field */
 	PGlobjfuncs *lobjfuncs;		/* private state for large-object access fns */
-- 
2.17.1

>From 2dee9b996ed8f242ceb5d0f1924a9c0455620e53 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Wed, 27 Mar 2019 17:56:48 +1100
Subject: [PATCH v13 4/8] New prefer-read target_session_attrs type

With this prefer-read option type, application can prefer
connecting to a read-only server if available from the list
of hosts, otherwise connect it to read-write server
---
 doc/src/sgml/libpq.sgml               |  21 ++--
 src/interfaces/libpq/fe-connect.c     | 161 ++++++++++++++++++++++----
 src/interfaces/libpq/libpq-fe.h       |   3 +-
 src/interfaces/libpq/libpq-int.h      |  13 ++-
 src/test/recovery/t/001_stream_rep.pl |  14 ++-
 5 files changed, 177 insertions(+), 35 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index edda94196b..29f9ae5c78 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1648,12 +1648,12 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       <term><literal>target_session_attrs</literal></term>
       <listitem>
        <para>
-        The supported options for this parameter are, <literal>any</literal> and
-        <literal>read-write</literal>. The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
-        If multiple hosts were specified in the connection string, based on the
-        specified value, any remaining servers will be tried before confirming
-        succesful connection or failure.
+        The supported options for this parameter are, <literal>any</literal>,
+        <literal>read-write</literal> and <literal>prefer-read</literal>.
+        The default value of this parameter, <literal>any</literal>, regards
+        all connections as acceptable. If multiple hosts were specified in the
+        connection string, based on the specified value, any remaining servers
+        will be tried before confirming succesful connection or failure.
        </para>
 
        <para>
@@ -1662,6 +1662,13 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         is considered acceptable.
        </para>
 
+       <para>
+        If this parameter is set to <literal>prefer-read</literal>, only a
+        connection in which read-only transactions are accepted by default
+        is preferred. If no such connections can be found, then a connection
+        in which read-write transactions accepted will be considered.
+       </para>
+
        <para>
         To find out whether the server supports read-write transactions are not,
         query <literal>SHOW transaction_read_only</literal> will be sent upon any
@@ -1671,7 +1678,7 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         configuration parameter that is reported by the server upon successful connection.
        </para>
       </listitem>
-    </varlistentry>
+     </varlistentry>
     </variablelist>
    </para>
   </sect2>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index abac93d6da..8673b8e903 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -341,7 +341,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1299,6 +1299,8 @@ connectOptions2(PGconn *conn)
 			conn->requested_session_type = SESSION_TYPE_ANY;
 		else if (strcmp(conn->target_session_attrs, "read-write") == 0)
 			conn->requested_session_type = SESSION_TYPE_READ_WRITE;
+		else if (strcmp(conn->target_session_attrs, "prefer-read") == 0)
+			conn->requested_session_type = SESSION_TYPE_PREFER_READ;
 		else
 		{
 			conn->status = CONNECTION_BAD;
@@ -2232,13 +2234,31 @@ keep_going:						/* We will come back to here until there is
 
 		if (conn->whichhost + 1 >= conn->nconnhost)
 		{
-			/*
-			 * Oops, no more hosts.  An appropriate error message is already
-			 * set up, so just set the right status.
-			 */
-			goto error_return;
+			if (conn->read_write_host_index >= 0)
+			{
+				/*
+				 * Getting here means, failed to connect to read-only servers
+				 * and now try connect to read-write server again.
+				 */
+				conn->whichhost = conn->read_write_host_index;
+
+				/*
+				 * Reset the host index value to avoid recursion during the
+				 * second connection attempt.
+				 */
+				conn->read_write_host_index = -2;
+			}
+			else
+			{
+				/*
+				 * Oops, no more hosts.  An appropriate error message is
+				 * already set up, so just set the right status.
+				 */
+				goto error_return;
+			}
 		}
-		conn->whichhost++;
+		else
+			conn->whichhost++;
 
 		/* Drop any address info for previous host */
 		release_conn_addrinfo(conn);
@@ -3445,7 +3465,8 @@ keep_going:						/* We will come back to here until there is
 		case CONNECTION_CHECK_TARGET:
 			{
 				/*
-				 * If a read-write connection is required, see if we have one.
+				 * If a read-write or prefer-read connection is required, see
+				 * if we have one.
 				 *
 				 * Servers before 7.4 lack the transaction_read_only GUC, but
 				 * by the same token they don't have any read-only mode, so we
@@ -3460,8 +3481,8 @@ keep_going:						/* We will come back to here until there is
 						 * Save existing error messages across the PQsendQuery
 						 * attempt.  This is necessary because PQsendQuery is
 						 * going to reset conn->errorMessage, so we would lose
-						 * error messages related to previous hosts we have tried
-						 * and failed to connect to.
+						 * error messages related to previous hosts we have
+						 * tried and failed to connect to.
 						 */
 						if (!saveErrorMessage(conn, &savedMessage))
 							goto error_return;
@@ -3473,16 +3494,30 @@ keep_going:						/* We will come back to here until there is
 							restoreErrorMessage(conn, &savedMessage);
 							goto error_return;
 						}
+
 						conn->status = CONNECTION_CHECK_WRITABLE;
+
 						restoreErrorMessage(conn, &savedMessage);
 						return PGRES_POLLING_READING;
 					}
-					else if (conn->transaction_read_only)
+					else if ((conn->transaction_read_only &&
+							  conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
+							 (!conn->transaction_read_only &&
+							  conn->requested_session_type == SESSION_TYPE_PREFER_READ))
 					{
-						/* Not writable; fail this connection. */
+						/* Not a requested type; fail this connection. */
 						const char *displayed_host;
 						const char *displayed_port;
 
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-read mode for the next pass of the list of
+						 * connections as it couldn't find any servers that
+						 * are default read-only.
+						 */
+						if (conn->read_write_host_index == -2)
+							goto consume_checked_target_connection;
+
 						/* Append error report to conn->errorMessage. */
 						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
 							displayed_host = conn->connhost[conn->whichhost].hostaddr;
@@ -3492,16 +3527,28 @@ keep_going:						/* We will come back to here until there is
 						if (displayed_port == NULL || displayed_port[0] == '\0')
 							displayed_port = DEF_PGPORT_STR;
 
-						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
-														"connection to server "
-														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a writable "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+						else
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a readonly "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+							conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3509,8 +3556,36 @@ keep_going:						/* We will come back to here until there is
 						conn->try_next_host = true;
 						goto keep_going;
 					}
+
+					/* obtained the requested type, consume it */
+					goto consume_checked_target_connection;
 				}
 
+				/*
+				 * Requested type is prefer-read, then record this host index
+				 * and try the other before considering it later
+				 */
+				if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+					conn->read_write_host_index != -2)
+				{
+					/* Close connection politely. */
+					conn->status = CONNECTION_OK;
+					sendTerminateConn(conn);
+
+					/* Record read-write host index */
+					if (conn->read_write_host_index == -1)
+						conn->read_write_host_index = conn->whichhost;
+
+					/*
+					 * Try next host if any, but we don't want to consider
+					 * additional addresses for this host.
+					 */
+					conn->try_next_host = true;
+					goto keep_going;
+				}
+
+		consume_checked_target_connection:
+
 				/* We can release the address list now. */
 				release_conn_addrinfo(conn);
 
@@ -3608,11 +3683,33 @@ keep_going:						/* We will come back to here until there is
 					PQntuples(res) == 1)
 				{
 					char	   *val;
+					bool		readonly_server;
 
 					val = PQgetvalue(res, 0, 0);
-					if (strncmp(val, "on", 2) == 0)
+					readonly_server = (strncmp(val, "on", 2) == 0);
+
+					/*
+					 * Server is read-only and requested mode is read-write,
+					 * ignore it. Server is read-write and requested mode is
+					 * prefer-read, record it for the first time and try to
+					 * consume in the next scan (it means no read-only server
+					 * is found in the first scan).
+					 */
+					if ((readonly_server &&
+						 conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
+						(!readonly_server &&
+						 conn->requested_session_type == SESSION_TYPE_PREFER_READ))
 					{
-						/* Not writable; fail this connection. */
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-read mode for the next pass of the list of
+						 * connections as it couldn't find any servers that
+						 * are default read-only.
+						 */
+						if (conn->read_write_host_index == -2)
+							goto consume_checked_write_connection;
+
+						/* Not a requested type; fail this connection. */
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
 
@@ -3625,16 +3722,28 @@ keep_going:						/* We will come back to here until there is
 						if (displayed_port == NULL || displayed_port[0] == '\0')
 							displayed_port = DEF_PGPORT_STR;
 
-						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
-														"connection to server "
-														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a writable "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+						else
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("could not make a readonly "
+															"connection to server "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+							conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3643,7 +3752,8 @@ keep_going:						/* We will come back to here until there is
 						goto keep_going;
 					}
 
-					/* Session is read-write, so we're good. */
+			consume_checked_write_connection:
+					/* Session is requested type, so we're good. */
 					PQclear(res);
 					termPQExpBuffer(&savedMessage);
 
@@ -3830,6 +3940,7 @@ makeEmptyPGconn(void)
 #endif
 
 	conn->requested_session_type = SESSION_TYPE_ANY;
+	conn->read_write_host_index = -1;
 
 	/*
 	 * We try to send at least 8K at a time, which is the usual size of pipe
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 0612e68c62..563f8b98ce 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -74,7 +74,8 @@ typedef enum
 typedef enum
 {
 	SESSION_TYPE_ANY = 0,		/* Any session (default) */
-	SESSION_TYPE_READ_WRITE		/* Read-write session */
+	SESSION_TYPE_READ_WRITE,	/* Read-write session */
+	SESSION_TYPE_PREFER_READ	/* Prefer read only session */
 }			TargetSessionAttrsType;
 
 typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 7d26b94f9f..174f370818 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -365,7 +365,10 @@ struct pg_conn
 	char	   *krbsrvname;		/* Kerberos service name */
 #endif
 
-	/* Type of connection to make.  Possible values: any, read-write. */
+	/*
+	 * Type of connection to make.  Possible values: any, read-write,
+	 * prefer-read.
+	 */
 	char	   *target_session_attrs;
 	TargetSessionAttrsType requested_session_type;
 
@@ -402,6 +405,14 @@ struct pg_conn
 	pg_conn_host *connhost;		/* details about each named host */
 	char	   *connip;			/* IP address for current network connection */
 
+	/*
+	 * First read-write host index in the connection string.
+	 *
+	 * Initial value is -1, then the index of the first read-write host, -2
+	 * during the second attempt of connection to avoid recursion.
+	 */
+	int			read_write_host_index;
+
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
 								 * unconnected */
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..af465be505 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 35;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -121,6 +121,18 @@ test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"any", 0);
 
+# Connect to standby1 in "prefer-read" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1, "prefer-read",
+	0);
+
+# Connect to standby1 in "prefer-read" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"prefer-read", 0);
+
+# Connect to node_master in "prefer-read" mode with only master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-read", 0);
+
 # Test for SHOW commands using a WAL sender connection with a replication
 # role.
 note "testing SHOW commands for replication connection";
-- 
2.17.1

>From 356b5c8969b04bd29c69de888efe5ab1cd8e3e3d Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Wed, 27 Mar 2019 18:02:59 +1100
Subject: [PATCH v13 5/8] New read-only target_session_attrs type

With this read-only option type, application can connect
to a read-only server in the list of hosts, in case
if there is no read-only server available, the connection
attempt fails.
---
 doc/src/sgml/libpq.sgml               |  7 +++++-
 src/interfaces/libpq/fe-connect.c     | 34 ++++++++++++++++++++-------
 src/interfaces/libpq/libpq-fe.h       |  3 ++-
 src/interfaces/libpq/libpq-int.h      |  2 +-
 src/test/recovery/t/001_stream_rep.pl | 10 +++++++-
 5 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 29f9ae5c78..3a1071e408 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1649,7 +1649,7 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       <listitem>
        <para>
         The supported options for this parameter are, <literal>any</literal>,
-        <literal>read-write</literal> and <literal>prefer-read</literal>.
+        <literal>read-write</literal>, <literal>prefer-read</literal> and <literal>read-only</literal>.
         The default value of this parameter, <literal>any</literal>, regards
         all connections as acceptable. If multiple hosts were specified in the
         connection string, based on the specified value, any remaining servers
@@ -1677,6 +1677,11 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         But for server version 12 or greater uses the value of <varname>transaction_read_only</varname>
         configuration parameter that is reported by the server upon successful connection.
        </para>
+
+       <para>
+        If this parameter is set to <literal>read-only</literal>, only a connection
+        in which read-only transactions are accepted by default.
+       </para>
       </listitem>
      </varlistentry>
     </variablelist>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 8673b8e903..e660675808 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -1301,6 +1301,8 @@ connectOptions2(PGconn *conn)
 			conn->requested_session_type = SESSION_TYPE_READ_WRITE;
 		else if (strcmp(conn->target_session_attrs, "prefer-read") == 0)
 			conn->requested_session_type = SESSION_TYPE_PREFER_READ;
+		else if (strcmp(conn->target_session_attrs, "read-only") == 0)
+			conn->requested_session_type = SESSION_TYPE_READ_ONLY;
 		else
 		{
 			conn->status = CONNECTION_BAD;
@@ -3465,8 +3467,8 @@ keep_going:						/* We will come back to here until there is
 		case CONNECTION_CHECK_TARGET:
 			{
 				/*
-				 * If a read-write or prefer-read connection is required, see
-				 * if we have one.
+				 * If a read-write, prefer-read or read-only connection is
+				 * required, see if we have one.
 				 *
 				 * Servers before 7.4 lack the transaction_read_only GUC, but
 				 * by the same token they don't have any read-only mode, so we
@@ -3503,7 +3505,8 @@ keep_going:						/* We will come back to here until there is
 					else if ((conn->transaction_read_only &&
 							  conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
 							 (!conn->transaction_read_only &&
-							  conn->requested_session_type == SESSION_TYPE_PREFER_READ))
+							  (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
+							   conn->requested_session_type == SESSION_TYPE_READ_ONLY)))
 					{
 						/* Not a requested type; fail this connection. */
 						const char *displayed_host;
@@ -3563,17 +3566,28 @@ keep_going:						/* We will come back to here until there is
 
 				/*
 				 * Requested type is prefer-read, then record this host index
-				 * and try the other before considering it later
+				 * and try the other before considering it later. If requested
+				 * type of connection is read-only, ignore this connection.
 				 */
-				if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-					conn->read_write_host_index != -2)
+				if (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
+					conn->requested_session_type == SESSION_TYPE_READ_ONLY)
 				{
+					/*
+					 * The following scenario is possible only for the
+					 * prefer-read mode for the next pass of the list of
+					 * connections as it couldn't find any servers that are
+					 * default read-only.
+					 */
+					if (conn->read_write_host_index == -2)
+						goto target_accept_connection;
+
 					/* Close connection politely. */
 					conn->status = CONNECTION_OK;
 					sendTerminateConn(conn);
 
 					/* Record read-write host index */
-					if (conn->read_write_host_index == -1)
+					if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+						conn->read_write_host_index == -1)
 						conn->read_write_host_index = conn->whichhost;
 
 					/*
@@ -3693,12 +3707,14 @@ keep_going:						/* We will come back to here until there is
 					 * ignore it. Server is read-write and requested mode is
 					 * prefer-read, record it for the first time and try to
 					 * consume in the next scan (it means no read-only server
-					 * is found in the first scan).
+					 * is found in the first scan). Server is read-write and
+					 * requested mode is read-only, ignore this connection.
 					 */
 					if ((readonly_server &&
 						 conn->requested_session_type == SESSION_TYPE_READ_WRITE) ||
 						(!readonly_server &&
-						 conn->requested_session_type == SESSION_TYPE_PREFER_READ))
+						 (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
+						  conn->requested_session_type == SESSION_TYPE_READ_ONLY)))
 					{
 						/*
 						 * The following scenario is possible only for the
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 563f8b98ce..fc0178cd5d 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -75,7 +75,8 @@ typedef enum
 {
 	SESSION_TYPE_ANY = 0,		/* Any session (default) */
 	SESSION_TYPE_READ_WRITE,	/* Read-write session */
-	SESSION_TYPE_PREFER_READ	/* Prefer read only session */
+	SESSION_TYPE_PREFER_READ,	/* Prefer read only session */
+	SESSION_TYPE_READ_ONLY		/* Read only session */
 }			TargetSessionAttrsType;
 
 typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 174f370818..e3c15b2ba8 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -367,7 +367,7 @@ struct pg_conn
 
 	/*
 	 * Type of connection to make.  Possible values: any, read-write,
-	 * prefer-read.
+	 * prefer-read and read-only.
 	 */
 	char	   *target_session_attrs;
 	TargetSessionAttrsType requested_session_type;
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index af465be505..ac1e11e1ab 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 35;
+use Test::More tests => 37;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -133,6 +133,14 @@ test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 test_target_session_attrs($node_master, $node_master, $node_master,
 	"prefer-read", 0);
 
+# Connect to standby1 in "read-only" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1,
+	"read-only", 0);
+
+# Connect to standby1 in "read-only" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"read-only", 0);
+
 # Test for SHOW commands using a WAL sender connection with a replication
 # role.
 note "testing SHOW commands for replication connection";
-- 
2.17.1

>From 9e551ce44a04a4646518c66aee146e4fc7c0c0f9 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Wed, 10 Apr 2019 23:19:09 +1000
Subject: [PATCH v13 6/8] Primary, prefer-standby and standby options

New options to check whether the server is in recovery mode
or not, before considering them to connect. To confirm whether
the server is running in recovery mode or not, it sends the query
to server as 'SELECT pg_is_in_recovery()'.
---
 doc/src/sgml/libpq.sgml               |  26 ++-
 src/interfaces/libpq/fe-connect.c     | 236 +++++++++++++++++++++++---
 src/interfaces/libpq/libpq-fe.h       |   8 +-
 src/interfaces/libpq/libpq-int.h      |   4 +-
 src/test/recovery/t/001_stream_rep.pl |  18 +-
 5 files changed, 262 insertions(+), 30 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 3a1071e408..e447e8fad7 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1649,7 +1649,8 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
       <listitem>
        <para>
         The supported options for this parameter are, <literal>any</literal>,
-        <literal>read-write</literal>, <literal>prefer-read</literal> and <literal>read-only</literal>.
+        <literal>read-write</literal>, <literal>prefer-read</literal>, <literal>read-only</literal>,
+        <literal>primary</literal>, <literal>prefer-standby</literal> and <literal>standby</literal>.
         The default value of this parameter, <literal>any</literal>, regards
         all connections as acceptable. If multiple hosts were specified in the
         connection string, based on the specified value, any remaining servers
@@ -1682,6 +1683,29 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         If this parameter is set to <literal>read-only</literal>, only a connection
         in which read-only transactions are accepted by default.
        </para>
+
+       <para>
+        If this parameter is set to <literal>primary</literal>, only a connection in which
+        where the server is not in recovery mode.
+       </para>
+
+       <para>
+        If this parameter is set to <literal>prefer-standby</literal>, only a connection in which
+        where the server is in recovery mode is preferred. If no such connections can be found,
+        then a connection in which server is not in recovery mode will be considered.
+       </para>
+
+       <para>
+        If this parameter is set to <literal>standby</literal>, only a connection in which
+        where the server is in recovery mode.
+       </para>
+
+       <para>
+        To find out whether the server is in recovery mode or not, query <literal>SELECT pg_is_in_recovery()</literal>
+        will be sent upon any successful connection; if it returns <literal>t</literal>, means server
+        is in recovery mode.
+       </para>
+
       </listitem>
      </varlistentry>
     </variablelist>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index e660675808..2e1872795a 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -125,6 +125,7 @@ static int	ldapServiceLookup(const char *purl, PQconninfoOption *options,
 #define DefaultOption	""
 #define DefaultAuthtype		  ""
 #define DefaultTargetSessionAttrs	"any"
+#define DefaultTargetServerType	"any"
 #ifdef USE_SSL
 #define DefaultSSLMode "prefer"
 #else
@@ -341,7 +342,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
+		"Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1303,6 +1304,12 @@ connectOptions2(PGconn *conn)
 			conn->requested_session_type = SESSION_TYPE_PREFER_READ;
 		else if (strcmp(conn->target_session_attrs, "read-only") == 0)
 			conn->requested_session_type = SESSION_TYPE_READ_ONLY;
+		else if (strcmp(conn->target_session_attrs, "primary") == 0)
+			conn->requested_session_type = SESSION_TYPE_PRIMARY;
+		else if (strcmp(conn->target_session_attrs, "prefer-standby") == 0)
+			conn->requested_session_type = SESSION_TYPE_PREFER_STANDBY;
+		else if (strcmp(conn->target_session_attrs, "standby") == 0)
+			conn->requested_session_type = SESSION_TYPE_STANDBY;
 		else
 		{
 			conn->status = CONNECTION_BAD;
@@ -2197,6 +2204,7 @@ PQconnectPoll(PGconn *conn)
 		case CONNECTION_CHECK_WRITABLE:
 		case CONNECTION_CONSUME:
 		case CONNECTION_GSS_STARTUP:
+		case CONNECTION_CHECK_RECOVERY:
 			break;
 
 		default:
@@ -2236,19 +2244,19 @@ keep_going:						/* We will come back to here until there is
 
 		if (conn->whichhost + 1 >= conn->nconnhost)
 		{
-			if (conn->read_write_host_index >= 0)
+			if (conn->read_write_or_primary_host_index >= 0)
 			{
 				/*
 				 * Getting here means, failed to connect to read-only servers
 				 * and now try connect to read-write server again.
 				 */
-				conn->whichhost = conn->read_write_host_index;
+				conn->whichhost = conn->read_write_or_primary_host_index;
 
 				/*
 				 * Reset the host index value to avoid recursion during the
 				 * second connection attempt.
 				 */
-				conn->read_write_host_index = -2;
+				conn->read_write_or_primary_host_index = -2;
 			}
 			else
 			{
@@ -3475,7 +3483,9 @@ keep_going:						/* We will come back to here until there is
 				 * may just skip the test in that case.
 				 */
 				if (conn->sversion >= 70400 &&
-					conn->requested_session_type != SESSION_TYPE_ANY)
+					(conn->requested_session_type == SESSION_TYPE_READ_WRITE ||
+					 conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
+					 conn->requested_session_type == SESSION_TYPE_READ_ONLY))
 				{
 					if (conn->sversion < 120000)
 					{
@@ -3518,7 +3528,7 @@ keep_going:						/* We will come back to here until there is
 						 * connections as it couldn't find any servers that
 						 * are default read-only.
 						 */
-						if (conn->read_write_host_index == -2)
+						if (conn->read_write_or_primary_host_index == -2)
 							goto consume_checked_target_connection;
 
 						/* Append error report to conn->errorMessage. */
@@ -3549,8 +3559,8 @@ keep_going:						/* We will come back to here until there is
 
 						/* Record read-write host index */
 						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-							conn->read_write_host_index == -1)
-							conn->read_write_host_index = conn->whichhost;
+							conn->read_write_or_primary_host_index == -1)
+							conn->read_write_or_primary_host_index = conn->whichhost;
 
 						/*
 						 * Try next host if any, but we don't want to consider
@@ -3565,30 +3575,70 @@ keep_going:						/* We will come back to here until there is
 				}
 
 				/*
-				 * Requested type is prefer-read, then record this host index
-				 * and try the other before considering it later. If requested
-				 * type of connection is read-only, ignore this connection.
+				 * severs before 9.0 don't support recovery, skip the check
+				 * when the requested type of connection is primary,
+				 * prefer-standby or standby.
 				 */
+				else if ((conn->sversion >= 90000 &&
+						  (conn->requested_session_type == SESSION_TYPE_PRIMARY ||
+						   conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY ||
+						   conn->requested_session_type == SESSION_TYPE_STANDBY)))
+				{
+					/*
+					 * Save existing error messages across the PQsendQuery
+					 * attempt.  This is necessary because PQsendQuery is
+					 * going to reset conn->errorMessage, so we would lose
+					 * error messages related to previous hosts we have tried
+					 * and failed to connect to.
+					 */
+					if (!saveErrorMessage(conn, &savedMessage))
+						goto error_return;
+
+					conn->status = CONNECTION_OK;
+					if (!PQsendQuery(conn, "SELECT pg_is_in_recovery()"))
+					{
+						restoreErrorMessage(conn, &savedMessage);
+						goto error_return;
+					}
+
+					conn->status = CONNECTION_CHECK_RECOVERY;
+
+					restoreErrorMessage(conn, &savedMessage);
+					return PGRES_POLLING_READING;
+				}
+
+				/*
+				 * Requested type is prefer-read or prefer-standby, then
+				 * record this host index and try the other before considering
+				 * it later. If requested type of connection is read-only or
+				 * standby, ignore this connection.
+				 */
+
 				if (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
-					conn->requested_session_type == SESSION_TYPE_READ_ONLY)
+					conn->requested_session_type == SESSION_TYPE_READ_ONLY ||
+					conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY ||
+					conn->requested_session_type == SESSION_TYPE_STANDBY)
 				{
 					/*
 					 * The following scenario is possible only for the
-					 * prefer-read mode for the next pass of the list of
-					 * connections as it couldn't find any servers that are
-					 * default read-only.
+					 * prefer-read or prefer-standby mode for the next pass of
+					 * the list of connections as it couldn't find any servers
+					 * that are default read-only or in recovery mode.
 					 */
-					if (conn->read_write_host_index == -2)
-						goto target_accept_connection;
+					if (conn->read_write_or_primary_host_index == -2)
+						goto consume_checked_target_connection;
 
 					/* Close connection politely. */
 					conn->status = CONNECTION_OK;
 					sendTerminateConn(conn);
 
 					/* Record read-write host index */
-					if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-						conn->read_write_host_index == -1)
-						conn->read_write_host_index = conn->whichhost;
+					if (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
+						conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY)
+					{
+						if (conn->read_write_or_primary_host_index == -1)
+							conn->read_write_or_primary_host_index = conn->whichhost;
+					}
 
 					/*
 					 * Try next host if any, but we don't want to consider
@@ -3722,7 +3772,7 @@ keep_going:						/* We will come back to here until there is
 						 * connections as it couldn't find any servers that
 						 * are default read-only.
 						 */
-						if (conn->read_write_host_index == -2)
+						if (conn->read_write_or_primary_host_index == -2)
 							goto consume_checked_write_connection;
 
 						/* Not a requested type; fail this connection. */
@@ -3757,8 +3807,8 @@ keep_going:						/* We will come back to here until there is
 
 						/* Record read-write host index */
 						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-							conn->read_write_host_index == -1)
-							conn->read_write_host_index = conn->whichhost;
+							conn->read_write_or_primary_host_index == -1)
+							conn->read_write_or_primary_host_index = conn->whichhost;
 
 						/*
 						 * Try next host if any, but we don't want to consider
@@ -3811,6 +3861,144 @@ keep_going:						/* We will come back to here until there is
 				goto keep_going;
 			}
 
+		case CONNECTION_CHECK_RECOVERY:
+			{
+				const char *displayed_host;
+				const char *displayed_port;
+
+				if (!saveErrorMessage(conn, &savedMessage))
+					goto error_return;
+
+				conn->status = CONNECTION_OK;
+				if (!PQconsumeInput(conn))
+				{
+					restoreErrorMessage(conn, &savedMessage);
+					goto error_return;
+				}
+
+				if (PQisBusy(conn))
+				{
+					conn->status = CONNECTION_CHECK_RECOVERY;
+					restoreErrorMessage(conn, &savedMessage);
+					return PGRES_POLLING_READING;
+				}
+
+				res = PQgetResult(conn);
+				if (res && (PQresultStatus(res) == PGRES_TUPLES_OK) &&
+					PQntuples(res) == 1)
+				{
+					char	   *val;
+					bool		standby_server;
+
+					val = PQgetvalue(res, 0, 0);
+					standby_server = (strncmp(val, "t", 1) == 0);
+
+					/*
+					 * Server is in recovery mode and requested mode is
+					 * primary, ignore it. Server is not in recovery mode and
+					 * requested mode is prefer-standby, record it for the
+					 * first time and try to consume in the next scan (it
+					 * means no standby server is found in the first scan).
+					 */
+					if ((standby_server &&
+						 conn->requested_session_type == SESSION_TYPE_PRIMARY) ||
+						(!standby_server &&
+						 (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY ||
+						  conn->requested_session_type == SESSION_TYPE_STANDBY)))
+					{
+
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-standby mode for the next pass of the list
+						 * of connections as it couldn't find any servers that
+						 * are in recovery.
+						 */
+						if (conn->read_write_or_primary_host_index == -2)
+							goto consume_checked_recovery_connection;
+
+						/* Not a requested type; fail this connection. */
+						PQclear(res);
+						restoreErrorMessage(conn, &savedMessage);
+
+						/* Append error report to conn->errorMessage. */
+						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+							displayed_host = conn->connhost[conn->whichhost].hostaddr;
+						else
+							displayed_host = conn->connhost[conn->whichhost].host;
+						displayed_port = conn->connhost[conn->whichhost].port;
+						if (displayed_port == NULL || displayed_port[0] == '\0')
+							displayed_port = DEF_PGPORT_STR;
+
+						if (conn->requested_session_type == SESSION_TYPE_PRIMARY)
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("server is in recovery mode "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+						else
+							appendPQExpBuffer(&conn->errorMessage,
+											  libpq_gettext("server is not in recovery mode "
+															"\"%s:%s\"\n"),
+											  displayed_host, displayed_port);
+
+						/* Close connection politely. */
+						conn->status = CONNECTION_OK;
+						sendTerminateConn(conn);
+
+						/* Record primary host index */
+						if (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY &&
+							conn->read_write_or_primary_host_index == -1)
+							conn->read_write_or_primary_host_index = conn->whichhost;
+
+						/*
+						 * Try next host if any, but we don't want to consider
+						 * additional addresses for this host.
+						 */
+						conn->try_next_host = true;
+						goto keep_going;
+					}
+
+			consume_checked_recovery_connection:
+					/* Session is requested type, so we're good. */
+					PQclear(res);
+					termPQExpBuffer(&savedMessage);
+
+					/*
+					 * Finish reading any remaining messages before being
+					 * considered as ready.
+					 */
+					conn->status = CONNECTION_CONSUME;
+					goto keep_going;
+				}
+
+				/*
+				 * Something went wrong with "SELECT pg_is_in_recovery()". We
+				 * should try next addresses.
+				 */
+				if (res)
+					PQclear(res);
+				restoreErrorMessage(conn, &savedMessage);
+
+				/* Append error report to conn->errorMessage. */
+				if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+					displayed_host = conn->connhost[conn->whichhost].hostaddr;
+				else
+					displayed_host = conn->connhost[conn->whichhost].host;
+				displayed_port = conn->connhost[conn->whichhost].port;
+				if (displayed_port == NULL || displayed_port[0] == '\0')
+					displayed_port = DEF_PGPORT_STR;
+				appendPQExpBuffer(&conn->errorMessage,
+								  libpq_gettext("test \"SELECT pg_is_in_recovery()\" failed "
+												"on server \"%s:%s\"\n"),
+								  displayed_host, displayed_port);
+
+				/* Close connection politely. */
+				conn->status = CONNECTION_OK;
+				sendTerminateConn(conn);
+
+				/* Try next address */
+				conn->try_next_addr = true;
+				goto keep_going;
+			}
 		default:
 			appendPQExpBuffer(&conn->errorMessage,
 							  libpq_gettext("invalid connection state %d, "
@@ -3956,7 +4144,7 @@ makeEmptyPGconn(void)
 #endif
 
 	conn->requested_session_type = SESSION_TYPE_ANY;
-	conn->read_write_host_index = -1;
+	conn->read_write_or_primary_host_index = -1;
 
 	/*
 	 * We try to send at least 8K at a time, which is the usual size of pipe
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index fc0178cd5d..30b181aa37 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -68,7 +68,8 @@ typedef enum
 	CONNECTION_CONSUME,			/* Wait for any pending message and consume
 								 * them. */
 	CONNECTION_GSS_STARTUP,		/* Negotiating GSSAPI. */
-	CONNECTION_CHECK_TARGET		/* Check if we have a proper target connection */
+	CONNECTION_CHECK_TARGET,	/* Check if we have a proper target connection */
+	 CONNECTION_CHECK_RECOVERY	/* Check whether server is in recovery */
 } ConnStatusType;
 
 typedef enum
@@ -76,7 +77,10 @@ typedef enum
 	SESSION_TYPE_ANY = 0,		/* Any session (default) */
 	SESSION_TYPE_READ_WRITE,	/* Read-write session */
 	SESSION_TYPE_PREFER_READ,	/* Prefer read only session */
-	SESSION_TYPE_READ_ONLY		/* Read only session */
+	SESSION_TYPE_READ_ONLY,		/* Read only session */
+	SESSION_TYPE_PRIMARY,		/* Primary server */
+	SESSION_TYPE_PREFER_STANDBY,	/* Prefer Standby server */
+	SESSION_TYPE_STANDBY		/* Standby server */
 }			TargetSessionAttrsType;
 
 typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index e3c15b2ba8..d9e38558c4 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -367,7 +367,7 @@ struct pg_conn
 
 	/*
 	 * Type of connection to make.  Possible values: any, read-write,
-	 * prefer-read and read-only.
+	 * prefer-read, read-only, primary, prefer-standby and standby.
 	 */
 	char	   *target_session_attrs;
 	TargetSessionAttrsType requested_session_type;
@@ -411,7 +411,7 @@ struct pg_conn
 	 * Initial value is -1, then the index of the first read-write host, -2
 	 * during the second attempt of connection to avoid recursion.
 	 */
-	int			read_write_host_index;
+	int			read_write_or_primary_host_index;
 
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index ac1e11e1ab..8fa28dab23 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 37;
+use Test::More tests => 41;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -141,6 +141,22 @@ test_target_session_attrs($node_master, $node_standby_1, $node_standby_1,
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"read-only", 0);
 
+# Connect to master in "primary" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_master,
+	"primary", 0);
+
+# Connect to master in "prefer-standby" mode with master,master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-standby", 0);
+
+# Connect to standby1 in "prefer-standby" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1,
+	"prefer-standby", 0);
+
+# Connect to standby1 in "standby" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1,
+	"standby", 0);
+
 # Test for SHOW commands using a WAL sender connection with a replication
 # role.
 note "testing SHOW commands for replication connection";
-- 
2.17.1

>From 599bcd4c876065f06489de5820211d42ab3b6ece Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Mon, 25 Mar 2019 18:11:18 +1100
Subject: [PATCH v13 7/8] New function to rejecting the checked write
 connection

When the connection is checked for write or not and based
on the result, if we decide to reject it, call the newly
added function to reject it.
---
 src/interfaces/libpq/fe-connect.c | 123 ++++++++++++------------------
 1 file changed, 47 insertions(+), 76 deletions(-)

diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 2e1872795a..f9075d2c10 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -2122,6 +2122,51 @@ restoreErrorMessage(PGconn *conn, PQExpBuffer savedMessage)
 	termPQExpBuffer(savedMessage);
 }
 
+static void
+reject_checked_write_connection(PGconn *conn)
+{
+	/* Not a requested type; fail this connection. */
+	const char *displayed_host;
+	const char *displayed_port;
+
+	/* Append error report to conn->errorMessage. */
+	if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+		displayed_host = conn->connhost[conn->whichhost].hostaddr;
+	else
+		displayed_host = conn->connhost[conn->whichhost].host;
+	displayed_port = conn->connhost[conn->whichhost].port;
+	if (displayed_port == NULL || displayed_port[0] == '\0')
+		displayed_port = DEF_PGPORT_STR;
+
+	if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+		appendPQExpBuffer(&conn->errorMessage,
+						  libpq_gettext("could not make a writable "
+										"connection to server "
+										"\"%s:%s\"\n"),
+						  displayed_host, displayed_port);
+	else
+		appendPQExpBuffer(&conn->errorMessage,
+						  libpq_gettext("could not make a readonly "
+										"connection to server "
+										"\"%s:%s\"\n"),
+						  displayed_host, displayed_port);
+
+	/* Close connection politely. */
+	conn->status = CONNECTION_OK;
+	sendTerminateConn(conn);
+
+	/* Record read-write host index */
+	if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
+		conn->read_write_or_primary_host_index == -1)
+		conn->read_write_or_primary_host_index = conn->whichhost;
+
+	/*
+	 * Try next host if any, but we don't want to consider additional
+	 * addresses for this host.
+	 */
+	conn->try_next_host = true;
+}
+
 /* ----------------
  *		PQconnectPoll
  *
@@ -3518,10 +3563,6 @@ keep_going:						/* We will come back to here until there is
 							  (conn->requested_session_type == SESSION_TYPE_PREFER_READ ||
 							   conn->requested_session_type == SESSION_TYPE_READ_ONLY)))
 					{
-						/* Not a requested type; fail this connection. */
-						const char *displayed_host;
-						const char *displayed_port;
-
 						/*
 						 * The following scenario is possible only for the
 						 * prefer-read mode for the next pass of the list of
@@ -3531,42 +3572,7 @@ keep_going:						/* We will come back to here until there is
 						if (conn->read_write_or_primary_host_index == -2)
 							goto consume_checked_target_connection;
 
-						/* Append error report to conn->errorMessage. */
-						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-							displayed_host = conn->connhost[conn->whichhost].hostaddr;
-						else
-							displayed_host = conn->connhost[conn->whichhost].host;
-						displayed_port = conn->connhost[conn->whichhost].port;
-						if (displayed_port == NULL || displayed_port[0] == '\0')
-							displayed_port = DEF_PGPORT_STR;
-
-						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("could not make a writable "
-															"connection to server "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-						else
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("could not make a readonly "
-															"connection to server "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-
-						/* Close connection politely. */
-						conn->status = CONNECTION_OK;
-						sendTerminateConn(conn);
-
-						/* Record read-write host index */
-						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-							conn->read_write_or_primary_host_index == -1)
-							conn->read_write_or_primary_host_index = conn->whichhost;
-
-						/*
-						 * Try next host if any, but we don't want to consider
-						 * additional addresses for this host.
-						 */
-						conn->try_next_host = true;
+						reject_checked_write_connection(conn);
 						goto keep_going;
 					}
 
@@ -3779,42 +3785,7 @@ keep_going:						/* We will come back to here until there is
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
 
-						/* Append error report to conn->errorMessage. */
-						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-							displayed_host = conn->connhost[conn->whichhost].hostaddr;
-						else
-							displayed_host = conn->connhost[conn->whichhost].host;
-						displayed_port = conn->connhost[conn->whichhost].port;
-						if (displayed_port == NULL || displayed_port[0] == '\0')
-							displayed_port = DEF_PGPORT_STR;
-
-						if (conn->requested_session_type == SESSION_TYPE_READ_WRITE)
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("could not make a writable "
-															"connection to server "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-						else
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("could not make a readonly "
-															"connection to server "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-
-						/* Close connection politely. */
-						conn->status = CONNECTION_OK;
-						sendTerminateConn(conn);
-
-						/* Record read-write host index */
-						if (conn->requested_session_type == SESSION_TYPE_PREFER_READ &&
-							conn->read_write_or_primary_host_index == -1)
-							conn->read_write_or_primary_host_index = conn->whichhost;
-
-						/*
-						 * Try next host if any, but we don't want to consider
-						 * additional addresses for this host.
-						 */
-						conn->try_next_host = true;
+						reject_checked_write_connection(conn);
 						goto keep_going;
 					}
 
-- 
2.17.1

>From 820e747f91a0d57d5018519f59a148fb50249b57 Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.harib...@gmail.com>
Date: Thu, 28 Mar 2019 15:30:01 +1100
Subject: [PATCH v13 8/8] Server recovery mode handling

in_recovery GUC_REPORT is added to update the clients when the
server is recovery mode, this is useful for the client connections
to connect to a standby server with a faster check instead of
executing a command.

New SIGUSR1 handling interrupt is added to support reporting
of recovery mode exit to all backends and their respective
clients.

Some parts of the code is taken from earlier development by
Elvis Pranskevichus and Tsunakawa Takayuki.
---
 doc/src/sgml/libpq.sgml              |  14 ++-
 doc/src/sgml/protocol.sgml           |   8 +-
 src/backend/access/transam/xlog.c    |   3 +
 src/backend/storage/ipc/procarray.c  |  28 ++++++
 src/backend/storage/ipc/procsignal.c |   3 +
 src/backend/storage/ipc/standby.c    |   9 ++
 src/backend/tcop/postgres.c          |  60 ++++++++++++
 src/backend/utils/init/postinit.c    |   6 +-
 src/backend/utils/misc/check_guc     |   2 +-
 src/backend/utils/misc/guc.c         |  16 ++++
 src/include/storage/procarray.h      |   1 +
 src/include/storage/procsignal.h     |   2 +
 src/include/storage/standby.h        |   1 +
 src/include/tcop/tcopprot.h          |   2 +
 src/interfaces/libpq/fe-connect.c    | 135 +++++++++++++++++----------
 src/interfaces/libpq/fe-exec.c       |   4 +
 src/interfaces/libpq/libpq-int.h     |   1 +
 17 files changed, 235 insertions(+), 60 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index e447e8fad7..dc1e89bb2a 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1702,8 +1702,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
 
        <para>
         To find out whether the server is in recovery mode or not, query <literal>SELECT pg_is_in_recovery()</literal>
-        will be sent upon any successful connection; if it returns <literal>t</literal>, means server
-        is in recovery mode.
+        will be sent upon any successful connection if the server is prior to version 12; if it returns
+        <literal>t</literal>, it means server is in recovery mode. But for server version 12 or greater
+        uses the value of <varname>in_recovery</varname> configuration parameter that is reported by the
+        server upon successful connection.
        </para>
 
       </listitem>
@@ -2016,15 +2018,17 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName);
        <varname>IntervalStyle</varname>,
        <varname>TimeZone</varname>,
        <varname>integer_datetimes</varname>,
-       <varname>standard_conforming_strings</varname>, and
-       <varname>transaction_read_only</varname>.
+       <varname>standard_conforming_strings</varname>,
+       <varname>transaction_read_only</varname> and
+       <varname>in_recovery</varname>.
        (<varname>server_encoding</varname>, <varname>TimeZone</varname>, and
        <varname>integer_datetimes</varname> were not reported by releases before 8.0;
        <varname>standard_conforming_strings</varname> was not reported by releases
        before 8.1;
        <varname>IntervalStyle</varname> was not reported by releases before 8.4;
        <varname>application_name</varname> was not reported by releases before 9.0;
-       <varname>transaction_read_only</varname> was not reported by release before 12.0.)
+       <varname>transaction_read_only</varname> and <varname>in_recovery</varname>
+       were not reported by release before 12.0.)
        Note that
        <varname>server_version</varname>,
        <varname>server_encoding</varname> and
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index dbf12fcc46..58142c072d 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1284,15 +1284,17 @@ SELECT 1/0;
     <varname>IntervalStyle</varname>,
     <varname>TimeZone</varname>,
     <varname>integer_datetimes</varname>,
-    <varname>standard_conforming_strings</varname>, and
-    <varname>transaction_read_only</varname>.
+    <varname>standard_conforming_strings</varname>,
+    <varname>transaction_read_only</varname> and
+    <varname>in_recovery</varname>.
     (<varname>server_encoding</varname>, <varname>TimeZone</varname>, and
     <varname>integer_datetimes</varname> were not reported by releases before 8.0;
     <varname>standard_conforming_strings</varname> was not reported by releases
     before 8.1;
     <varname>IntervalStyle</varname> was not reported by releases before 8.4;
     <varname>application_name</varname> was not reported by releases before 9.0;
-    <varname>transaction_read_only</varname> was not reported by releases before 12.0.)
+    <varname>transaction_read_only</varname> and <varname>in_recovery</varname>
+    were not reported by releases before 12.0.)
     Note that
     <varname>server_version</varname>,
     <varname>server_encoding</varname> and
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6876537b62..5093fd183a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7769,6 +7769,9 @@ StartupXLOG(void)
 	XLogCtl->SharedRecoveryInProgress = false;
 	SpinLockRelease(&XLogCtl->info_lck);
 
+	if (standbyState != STANDBY_DISABLED)
+		SendRecoveryExitSignal();
+
 	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 8abcfdf841..744475cc2c 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2970,6 +2970,34 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
 	return true;				/* timed out, still conflicts */
 }
 
+/*
+ * SendSignalToAllBackends --- send a signal to all backends.
+ */
+void
+SendSignalToAllBackends(ProcSignalReason reason)
+{
+	ProcArrayStruct *arrayP = procArray;
+	int			index;
+	pid_t		pid = 0;
+
+	LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+	for (index = 0; index < arrayP->numProcs; index++)
+	{
+		int			pgprocno = arrayP->pgprocnos[index];
+		volatile PGPROC *proc = &allProcs[pgprocno];
+		VirtualTransactionId procvxid;
+
+		GET_VXID_FROM_PGPROC(procvxid, *proc);
+
+		pid = proc->pid;
+		if (pid != 0)
+			(void) SendProcSignal(pid, reason, procvxid.backendId);
+	}
+
+	LWLockRelease(ProcArrayLock);
+}
+
 /*
  * ProcArraySetReplicationSlotXmin
  *
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index 7605b2c367..e4548dc323 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -292,6 +292,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
 
+	if (CheckProcSignal(PROCSIG_RECOVERY_EXIT))
+		HandleRecoveryExitInterrupt();
+
 	SetLatch(MyLatch);
 
 	latch_sigusr1_handler();
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 01ddffec40..b0e88ee545 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -138,6 +138,15 @@ ShutdownRecoveryTransactionEnvironment(void)
 	VirtualXactLockTableCleanup();
 }
 
+/*
+ * SendRecoveryExitSignal
+ *		Signal backends that the server has exited recovery mode.
+ */
+void
+SendRecoveryExitSignal(void)
+{
+	SendSignalToAllBackends(PROCSIG_RECOVERY_EXIT);
+}
 
 /*
  * -----------------------------------------------------
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index e8d8e6f828..69ce3ec786 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -167,6 +167,15 @@ static bool RecoveryConflictPending = false;
 static bool RecoveryConflictRetryable = true;
 static ProcSignalReason RecoveryConflictReason;
 
+/*
+ * Inbound recovery exit are initially processed by
+ * HandleRecoveryExitInterrupt(), called from inside a signal handler.
+ * That just sets the recoveryExitInterruptPending flag and sets the process
+ * latch. ProcessRecoveryExitInterrupt() will then be called whenever it's
+ * safe to actually deal with the interrupt.
+ */
+volatile sig_atomic_t recoveryExitInterruptPending = false;
+
 /* reused buffer to pass to SendRowDescriptionMessage() */
 static MemoryContext row_description_context = NULL;
 static StringInfoData row_description_buf;
@@ -195,6 +204,7 @@ static void drop_unnamed_stmt(void);
 static void log_disconnections(int code, Datum arg);
 static void enable_statement_timeout(void);
 static void disable_statement_timeout(void);
+static void ProcessRecoveryExitInterrupt(void);
 
 
 /* ----------------------------------------------------------------
@@ -543,6 +553,10 @@ ProcessClientReadInterrupt(bool blocked)
 		/* Process notify interrupts, if any */
 		if (notifyInterruptPending)
 			ProcessNotifyInterrupt();
+
+		/* Process recovery exit interrupts that happened while reading */
+		if (recoveryExitInterruptPending)
+			ProcessRecoveryExitInterrupt();
 	}
 	else if (ProcDiePending)
 	{
@@ -2961,6 +2975,52 @@ RecoveryConflictInterrupt(ProcSignalReason reason)
 	errno = save_errno;
 }
 
+/*
+ * HandleRecoveryExitInterrupt
+ *
+ *		Signal handler portion of interrupt handling. Let the backend know
+ *		that the server has exited the recovery mode.
+ */
+void
+HandleRecoveryExitInterrupt(void)
+{
+	/*
+	 * Note: this is called by a SIGNAL HANDLER. You must be very wary what
+	 * you do here.
+	 */
+
+	/* signal that work needs to be done */
+	recoveryExitInterruptPending = true;
+
+	/* make sure the event is processed in due course */
+	SetLatch(MyLatch);
+}
+
+/*
+ * ProcessRecoveryExitInterrupt
+ *
+ *		This is called just after waiting for a frontend command.  If a
+ *		interrupt arrives (via HandleRecoveryExitInterrupt()) while reading,
+ *		the read will be interrupted via the process's latch, and this routine
+ *		will get called.
+*/
+static void
+ProcessRecoveryExitInterrupt(void)
+{
+	recoveryExitInterruptPending = false;
+
+	SetConfigOption("in_recovery",
+					"off",
+					PGC_INTERNAL, PGC_S_OVERRIDE);
+
+	/*
+	 * Flush output buffer so that clients receive the ParameterStatus message
+	 * as soon as possible.
+	 */
+	pq_flush();
+}
+
+
 /*
  * ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro
  *
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 29c5ec7b58..59fb4e905b 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -649,7 +649,11 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
 		 * This is handled by calling RecoveryInProgress and ignoring the
 		 * result.
 		 */
-		(void) RecoveryInProgress();
+		if (RecoveryInProgress())
+			SetConfigOption("in_recovery",
+							"on",
+							PGC_INTERNAL, PGC_S_OVERRIDE);
+
 	}
 	else
 	{
diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc
index 416a0875b6..a4ebcef614 100755
--- a/src/backend/utils/misc/check_guc
+++ b/src/backend/utils/misc/check_guc
@@ -21,7 +21,7 @@ is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \
 pre_auth_delay role seed server_encoding server_version server_version_num \
 session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \
 trace_notify trace_userlocks transaction_isolation transaction_read_only \
-zero_damaged_pages"
+zero_damaged_pages in_recovery"
 
 ### What options are listed in postgresql.conf.sample, but don't appear
 ### in guc.c?
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 3c47473bcd..aacaecc520 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -580,6 +580,7 @@ static char *recovery_target_string;
 static char *recovery_target_xid_string;
 static char *recovery_target_name_string;
 static char *recovery_target_lsn_string;
+static bool in_recovery;
 
 
 /* should be static, but commands/variable.c needs to get at this */
@@ -1769,6 +1770,21 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		/*
+		 * Not for general use --- used to indicate whether the instance is
+		 * recovery mode
+		 */
+		{"in_recovery", PGC_INTERNAL, UNGROUPED,
+			gettext_noop("Shows whether the instance is in recovery mode."),
+			NULL,
+			GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+		},
+		&in_recovery,
+		false,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"allow_system_table_mods", PGC_POSTMASTER, DEVELOPER_OPTIONS,
 			gettext_noop("Allows modifications of the structure of system tables."),
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index da8b672096..86f0c13134 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -113,6 +113,7 @@ extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conf
 extern int	CountUserBackends(Oid roleid);
 extern bool CountOtherDBBackends(Oid databaseId,
 								 int *nbackends, int *nprepared);
+extern void SendSignalToAllBackends(ProcSignalReason reason);
 
 extern void XidCacheRemoveRunningXids(TransactionId xid,
 									  int nxids, const TransactionId *xids,
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 05b186a05c..9cf9560b06 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -42,6 +42,8 @@ typedef enum
 	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
 
+	PROCSIG_RECOVERY_EXIT,		/* recovery exit interrupt */
+
 	NUM_PROCSIGNALS				/* Must be last! */
 } ProcSignalReason;
 
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index a3f8f82ff3..2c73f0c0a8 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -26,6 +26,7 @@ extern int	max_standby_streaming_delay;
 
 extern void InitRecoveryTransactionEnvironment(void);
 extern void ShutdownRecoveryTransactionEnvironment(void);
+extern void SendRecoveryExitSignal(void);
 
 extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
 												RelFileNode node);
diff --git a/src/include/tcop/tcopprot.h b/src/include/tcop/tcopprot.h
index ec21f7e45c..ed21a9e2f2 100644
--- a/src/include/tcop/tcopprot.h
+++ b/src/include/tcop/tcopprot.h
@@ -66,6 +66,8 @@ extern void StatementCancelHandler(SIGNAL_ARGS);
 extern void FloatExceptionHandler(SIGNAL_ARGS) pg_attribute_noreturn();
 extern void RecoveryConflictInterrupt(ProcSignalReason reason); /* called from SIGUSR1
 																 * handler */
+/* recovery exit interrupt handling function */
+extern void HandleRecoveryExitInterrupt(void);
 extern void ProcessClientReadInterrupt(bool blocked);
 extern void ProcessClientWriteInterrupt(bool blocked);
 
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index f9075d2c10..585339b537 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -2167,6 +2167,49 @@ reject_checked_write_connection(PGconn *conn)
 	conn->try_next_host = true;
 }
 
+static void
+reject_checked_recovery_connection(PGconn *conn)
+{
+	/* Not a requested type; fail this connection. */
+	const char *displayed_host;
+	const char *displayed_port;
+
+	/* Append error report to conn->errorMessage. */
+	if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
+		displayed_host = conn->connhost[conn->whichhost].hostaddr;
+	else
+		displayed_host = conn->connhost[conn->whichhost].host;
+	displayed_port = conn->connhost[conn->whichhost].port;
+	if (displayed_port == NULL || displayed_port[0] == '\0')
+		displayed_port = DEF_PGPORT_STR;
+
+	if (conn->requested_session_type == SESSION_TYPE_PRIMARY)
+		appendPQExpBuffer(&conn->errorMessage,
+						  libpq_gettext("server is in recovery mode "
+										"\"%s:%s\"\n"),
+						  displayed_host, displayed_port);
+	else
+		appendPQExpBuffer(&conn->errorMessage,
+						  libpq_gettext("server is not in recovery mode "
+										"\"%s:%s\"\n"),
+						  displayed_host, displayed_port);
+
+	/* Close connection politely. */
+	conn->status = CONNECTION_OK;
+	sendTerminateConn(conn);
+
+	/* Record primary host index */
+	if (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY &&
+		conn->read_write_or_primary_host_index == -1)
+		conn->read_write_or_primary_host_index = conn->whichhost;
+
+	/*
+	 * Try next host if any, but we don't want to consider additional
+	 * addresses for this host.
+	 */
+	conn->try_next_host = true;
+}
+
 /* ----------------
  *		PQconnectPoll
  *
@@ -3590,27 +3633,52 @@ keep_going:						/* We will come back to here until there is
 						   conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY ||
 						   conn->requested_session_type == SESSION_TYPE_STANDBY)))
 				{
-					/*
-					 * Save existing error messages across the PQsendQuery
-					 * attempt.  This is necessary because PQsendQuery is
-					 * going to reset conn->errorMessage, so we would lose
-					 * error messages related to previous hosts we have tried
-					 * and failed to connect to.
-					 */
-					if (!saveErrorMessage(conn, &savedMessage))
-						goto error_return;
 
-					conn->status = CONNECTION_OK;
-					if (!PQsendQuery(conn, "SELECT pg_is_in_recovery()"))
+					if (conn->sversion < 120000)
 					{
+						/*
+						 * Save existing error messages across the PQsendQuery
+						 * attempt.  This is necessary because PQsendQuery is
+						 * going to reset conn->errorMessage, so we would lose
+						 * error messages related to previous hosts we have
+						 * tried and failed to connect to.
+						 */
+						if (!saveErrorMessage(conn, &savedMessage))
+							goto error_return;
+
+						conn->status = CONNECTION_OK;
+						if (!PQsendQuery(conn, "SELECT pg_is_in_recovery()"))
+						{
+							restoreErrorMessage(conn, &savedMessage);
+							goto error_return;
+						}
+
+						conn->status = CONNECTION_CHECK_RECOVERY;
+
 						restoreErrorMessage(conn, &savedMessage);
-						goto error_return;
+						return PGRES_POLLING_READING;
+					}
+					else if ((conn->in_recovery &&
+							  conn->requested_session_type == SESSION_TYPE_PRIMARY) ||
+							 (!conn->in_recovery &&
+							  (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY ||
+							   conn->requested_session_type == SESSION_TYPE_STANDBY)))
+					{
+						/*
+						 * The following scenario is possible only for the
+						 * prefer-standby mode for the next pass of the list
+						 * of connections as it couldn't find any servers that
+						 * are in recovery.
+						 */
+						if (conn->read_write_or_primary_host_index == -2)
+							goto consume_checked_target_connection;
+
+						reject_checked_recovery_connection(conn);
+						goto keep_going;
 					}
 
-					conn->status = CONNECTION_CHECK_RECOVERY;
-
-					restoreErrorMessage(conn, &savedMessage);
-					return PGRES_POLLING_READING;
+					/* obtained the requested type, consume it */
+					goto consume_checked_target_connection;
 				}
 
 				/*
@@ -3891,40 +3959,7 @@ keep_going:						/* We will come back to here until there is
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
 
-						/* Append error report to conn->errorMessage. */
-						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
-							displayed_host = conn->connhost[conn->whichhost].hostaddr;
-						else
-							displayed_host = conn->connhost[conn->whichhost].host;
-						displayed_port = conn->connhost[conn->whichhost].port;
-						if (displayed_port == NULL || displayed_port[0] == '\0')
-							displayed_port = DEF_PGPORT_STR;
-
-						if (conn->requested_session_type == SESSION_TYPE_PRIMARY)
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("server is in recovery mode "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-						else
-							appendPQExpBuffer(&conn->errorMessage,
-											  libpq_gettext("server is not in recovery mode "
-															"\"%s:%s\"\n"),
-											  displayed_host, displayed_port);
-
-						/* Close connection politely. */
-						conn->status = CONNECTION_OK;
-						sendTerminateConn(conn);
-
-						/* Record primary host index */
-						if (conn->requested_session_type == SESSION_TYPE_PREFER_STANDBY &&
-							conn->read_write_or_primary_host_index == -1)
-							conn->read_write_or_primary_host_index = conn->whichhost;
-
-						/*
-						 * Try next host if any, but we don't want to consider
-						 * additional addresses for this host.
-						 */
-						conn->try_next_host = true;
+						reject_checked_recovery_connection(conn);
 						goto keep_going;
 					}
 
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 3c17100e05..a964483eee 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -1117,6 +1117,10 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
 	{
 		conn->transaction_read_only = (strcmp(value, "on") == 0);
 	}
+	else if (strcmp(name, "in_recovery") == 0)
+	{
+		conn->in_recovery = (strcmp(value, "on") == 0);
+	}
 }
 
 
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index d9e38558c4..436109b41c 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -444,6 +444,7 @@ struct pg_conn
 	int			client_encoding;	/* encoding id */
 	bool		std_strings;	/* standard_conforming_strings */
 	bool		transaction_read_only;	/* transaction_read_only */
+	bool		in_recovery;	/* in_recovery */
 	PGVerbosity verbosity;		/* error/notice message verbosity */
 	PGContextVisibility show_context;	/* whether to show CONTEXT field */
 	PGlobjfuncs *lobjfuncs;		/* private state for large-object access fns */
-- 
2.17.1

Reply via email to