On Wednesday, May 24, 2017 9:38:36 PM EDT Tsunakawa, Takayuki wrote:
> The clients will know the change of session_read_only when they do
> something that calls RecoveryInProgress(). Currently,
> RecoveryInProgress() seems to be the only place where the sessions
> notice the promotion, so I set session_read_only to the value of
> default_transaction_read_only there. I think that there is room for
> discussion here. It would be ideal for the sessions to notice the
> server promotion promptly and notify the clients of the change. I
> have no idea to do that well.
My original patch did that via the new SendSignalToAllBackends() helper,
which is called whenever the postmaster exits hot stanby.
I incorporated those bits into your patch and rebased in onto master.
Please see attached.
FWIW, I think that mixing the standby status and the default
transaction writability is suboptimal. They are related, yes, but not
the same thing. It is possible to have a master cluster in the
read-only mode, and with this patch it would be impossible to
distinguish from a hot-standby replica without also polling
pg_is_in_recovery(), which defeats the purpose of having to do no
database roundtrips.
Elvis
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 096a8be605..4d74740699 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1474,10 +1474,14 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
<para>
If this parameter is set to <literal>read-write</literal>, only a
connection in which read-write transactions are accepted by default
+ is considered acceptable.
+ If this parameter is set to <literal>read-only</literal>, only a
+ connection in which read-write transactions are rejected by default
is considered acceptable. The query
<literal>SHOW transaction_read_only</literal> will be sent upon any
- successful connection; if it returns <literal>on</>, the connection
- will be closed. If multiple hosts were specified in the connection
+ successful connection if the server is prior to version 10.
+ If the session attribute does not match the requested one, the connection will be closed.
+ If multiple hosts were specified in the connection
string, any remaining servers will be tried just as if the connection
attempt had failed. The default value of this parameter,
<literal>any</>, regards all connections as acceptable.
@@ -1758,6 +1762,7 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName);
<varname>application_name</>,
<varname>is_superuser</>,
<varname>session_authorization</>,
+ <varname>session_read_only</>,
<varname>DateStyle</>,
<varname>IntervalStyle</>,
<varname>TimeZone</>,
@@ -1768,7 +1773,8 @@ const char *PQparameterStatus(const PGconn *conn, const char *paramName);
<varname>standard_conforming_strings</> was not reported by releases
before 8.1;
<varname>IntervalStyle</> was not reported by releases before 8.4;
- <varname>application_name</> was not reported by releases before 9.0.)
+ <varname>application_name</> was not reported by releases before 9.0;
+ <varname>session_read_only</> was not reported by releases before 10.0.)
Note that
<varname>server_version</>,
<varname>server_encoding</> and
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 76d1c13cc4..e2a1cd1c03 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1255,6 +1255,7 @@ SELCT 1/0;
<varname>application_name</>,
<varname>is_superuser</>,
<varname>session_authorization</>,
+ <varname>session_read_only</>,
<varname>DateStyle</>,
<varname>IntervalStyle</>,
<varname>TimeZone</>,
@@ -1265,7 +1266,8 @@ SELCT 1/0;
<varname>standard_conforming_strings</> was not reported by releases
before 8.1;
<varname>IntervalStyle</> was not reported by releases before 8.4;
- <varname>application_name</> was not reported by releases before 9.0.)
+ <varname>application_name</> was not reported by releases before 9.0;
+ <varname>session_read_only</> was not reported by releases before 10.0.)
Note that
<varname>server_version</>,
<varname>server_encoding</> and
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 442341a707..e862d7e993 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7715,8 +7715,10 @@ StartupXLOG(void)
* Shutdown the recovery environment. This must occur after
* RecoverPreparedTransactions(), see notes for lock_twophase_recover()
*/
- if (standbyState != STANDBY_DISABLED)
+ if (standbyState != STANDBY_DISABLED) {
ShutdownRecoveryTransactionEnvironment();
+ SendHotStandbyExitSignal();
+ }
/* Shut down xlogreader */
if (readFile >= 0)
@@ -7921,6 +7923,11 @@ RecoveryInProgress(void)
*/
pg_memory_barrier();
InitXLOGAccess();
+
+ /* Update session read-only status. */
+ SetConfigOption("session_read_only",
+ DefaultXactReadOnly ? "on" : "off",
+ PGC_INTERNAL, PGC_S_OVERRIDE);
}
/*
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index bacc08eb84..9b1646cf9b 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -355,6 +355,8 @@ static List *upperPendingNotifies = NIL; /* list of upper-xact lists */
*/
volatile sig_atomic_t notifyInterruptPending = false;
+volatile sig_atomic_t hotStandbyExitInterruptPending = false;
+
/* True if we've registered an on_shmem_exit cleanup */
static bool unlistenExitRegistered = false;
@@ -1733,6 +1735,53 @@ ProcessNotifyInterrupt(void)
}
+/*
+ * HandleHotStandbyExitInterrupt
+ *
+ * Signal handler portion of interrupt handling. Let the backend know
+ * that the server has exited the recovery mode.
+ */
+void
+HandleHotStandbyExitInterrupt(void)
+{
+ /*
+ * Note: this is called by a SIGNAL HANDLER. You must be very wary what
+ * you do here.
+ */
+
+ /* signal that work needs to be done */
+ hotStandbyExitInterruptPending = true;
+
+ /* make sure the event is processed in due course */
+ SetLatch(MyLatch);
+}
+
+
+/*
+ * ProcessHotStandbyExitInterrupt
+ *
+ * This is called just after waiting for a frontend command. If a
+ * interrupt arrives (via HandleHotStandbyExitInterrupt()) while reading,
+ * the read will be interrupted via the process's latch, and this routine
+ * will get called.
+ */
+void
+ProcessHotStandbyExitInterrupt(void)
+{
+ hotStandbyExitInterruptPending = false;
+
+ SetConfigOption("session_read_only",
+ DefaultXactReadOnly ? "on" : "off",
+ PGC_INTERNAL, PGC_S_OVERRIDE);
+
+ /*
+ * Flush output buffer so that clients receive the ParameterStatus
+ * message as soon as possible.
+ */
+ pq_flush();
+}
+
+
/*
* Read all pending notifications from the queue, and deliver appropriate
* ones to my frontend. Stop when we reach queue head or an uncommitted
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index ffa6180eff..d4903b5528 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2953,6 +2953,36 @@ CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
return true; /* timed out, still conflicts */
}
+/*
+ * SendSignalToAllBackends --- send a signal to all backends.
+ */
+void
+SendSignalToAllBackends(ProcSignalReason reason)
+{
+ ProcArrayStruct *arrayP = procArray;
+ int index;
+ pid_t pid = 0;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ volatile PGPROC *proc = &allProcs[pgprocno];
+ VirtualTransactionId procvxid;
+
+ GET_VXID_FROM_PGPROC(procvxid, *proc);
+
+ pid = proc->pid;
+ if (pid != 0)
+ {
+ (void) SendProcSignal(pid, reason, procvxid.backendId);
+ }
+ }
+
+ LWLockRelease(ProcArrayLock);
+}
+
/*
* ProcArraySetReplicationSlotXmin
*
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index b9302ac630..9bc805fc7c 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -292,6 +292,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
+ if (CheckProcSignal(PROCSIG_HOTSTANDBY_EXIT))
+ HandleHotStandbyExitInterrupt();
+
SetLatch(MyLatch);
latch_sigusr1_handler();
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index d491ece60a..bd57fab328 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -111,6 +111,15 @@ ShutdownRecoveryTransactionEnvironment(void)
VirtualXactLockTableCleanup();
}
+/*
+ * SendHotStandbyExitSignal
+ * Signal backends that the server has exited Hot Standby.
+ */
+void
+SendHotStandbyExitSignal(void)
+{
+ SendSignalToAllBackends(PROCSIG_HOTSTANDBY_EXIT);
+}
/*
* -----------------------------------------------------
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 4eb85720a7..590d42ffc4 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -526,9 +526,13 @@ ProcessClientReadInterrupt(bool blocked)
if (catchupInterruptPending)
ProcessCatchupInterrupt();
- /* Process sinval catchup interrupts that happened while reading */
+ /* Process NOTIFY interrupts that happened while reading */
if (notifyInterruptPending)
ProcessNotifyInterrupt();
+
+ /* Process recovery exit interrupts that happened while reading */
+ if (hotStandbyExitInterruptPending)
+ ProcessHotStandbyExitInterrupt();
}
else if (ProcDiePending && blocked)
{
@@ -3749,6 +3753,14 @@ PostgresMain(int argc, char *argv[],
*/
InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL);
+ /*
+ * Update session read-only status if in recovery.
+ */
+ if (IsUnderPostmaster && !DefaultXactReadOnly &&
+ RecoveryInProgress())
+ SetConfigOption("session_read_only", "on",
+ PGC_INTERNAL, PGC_S_OVERRIDE);
+
/*
* If the PostmasterContext is still around, recycle the space; we don't
* need it anymore after InitPostgres completes. Note this does not trash
diff --git a/src/backend/utils/misc/check_guc b/src/backend/utils/misc/check_guc
index d228bbed68..da96df2298 100755
--- a/src/backend/utils/misc/check_guc
+++ b/src/backend/utils/misc/check_guc
@@ -21,7 +21,7 @@ is_superuser lc_collate lc_ctype lc_messages lc_monetary lc_numeric lc_time \
pre_auth_delay role seed server_encoding server_version server_version_int \
session_authorization trace_lock_oidmin trace_lock_table trace_locks trace_lwlocks \
trace_notify trace_userlocks transaction_isolation transaction_read_only \
-zero_damaged_pages"
+zero_damaged_pages session_read_only"
### What options are listed in postgresql.conf.sample, but don't appear
### in guc.c?
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 969e80f756..0ffa38f1ea 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -147,6 +147,8 @@ static bool call_string_check_hook(struct config_string *conf, char **newval,
static bool call_enum_check_hook(struct config_enum *conf, int *newval,
void **extra, GucSource source, int elevel);
+static void assign_default_transaction_read_only(bool newval, void *extra);
+
static bool check_log_destination(char **newval, void **extra, GucSource source);
static void assign_log_destination(const char *newval, void *extra);
@@ -493,6 +495,7 @@ int huge_pages;
*/
static char *syslog_ident_str;
static bool session_auth_is_superuser;
+static bool session_read_only;
static double phony_random_seed;
static char *client_encoding_string;
static char *datestyle_string;
@@ -932,6 +935,20 @@ static struct config_bool ConfigureNamesBool[] =
false,
NULL, NULL, NULL
},
+ {
+ /*
+ * Not for general use --- used to indicate whether
+ * the session is read-only by default.
+ */
+ {"session_read_only", PGC_INTERNAL, UNGROUPED,
+ gettext_noop("Shows whether the session is read-only by default."),
+ NULL,
+ GUC_REPORT | GUC_NO_SHOW_ALL | GUC_NO_RESET_ALL | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
+ },
+ &session_read_only,
+ false,
+ NULL, NULL, NULL
+ },
{
{"bonjour", PGC_POSTMASTER, CONN_AUTH_SETTINGS,
gettext_noop("Enables advertising the server via Bonjour."),
@@ -1366,7 +1383,7 @@ static struct config_bool ConfigureNamesBool[] =
},
&DefaultXactReadOnly,
false,
- NULL, NULL, NULL
+ NULL, assign_default_transaction_read_only, NULL
},
{
{"transaction_read_only", PGC_USERSET, CLIENT_CONN_STATEMENT,
@@ -10049,6 +10066,30 @@ assign_wal_consistency_checking(const char *newval, void *extra)
wal_consistency_checking = (bool *) extra;
}
+static void
+assign_default_transaction_read_only(bool newval, void *extra)
+{
+ if (newval == DefaultXactReadOnly)
+ return;
+
+ /*
+ * We clamp manually-set values to at least 1MB. Since
+ * Also set the session read-only parameter. We only need
+ * to set the correct value in processes that have database
+ * sessions, but there's no mechanism to know that there's
+ * a session. Instead, we use the shared memory segment
+ * pointer because the processes with database sessions are
+ * attached to the shared memory. Without this check,
+ * RecoveryInProgress() would crash the processes which
+ * are not attached to the shared memory.
+ */
+ if (IsUnderPostmaster && UsedShmemSegAddr != NULL &&
+ RecoveryInProgress())
+ newval = true;
+ SetConfigOption("session_read_only", newval ? "on" : "off",
+ PGC_INTERNAL, PGC_S_OVERRIDE);
+}
+
static bool
check_log_destination(char **newval, void **extra, GucSource source)
{
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 939711d8d9..7d62c59214 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -24,6 +24,7 @@
extern bool Trace_notify;
extern volatile sig_atomic_t notifyInterruptPending;
+extern volatile sig_atomic_t hotStandbyExitInterruptPending;
extern Size AsyncShmemSize(void);
extern void AsyncShmemInit(void);
@@ -54,4 +55,10 @@ extern void HandleNotifyInterrupt(void);
/* process interrupts */
extern void ProcessNotifyInterrupt(void);
+/* signal handler for inbound notifies (PROCSIG_HOTSTANDBY_EXIT) */
+extern void HandleHotStandbyExitInterrupt(void);
+
+/* process recovery exit event */
+extern void ProcessHotStandbyExitInterrupt(void);
+
#endif /* ASYNC_H */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 174c537be4..06ec4c7524 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -114,6 +114,8 @@ extern int CountUserBackends(Oid roleid);
extern bool CountOtherDBBackends(Oid databaseId,
int *nbackends, int *nprepared);
+extern void SendSignalToAllBackends(ProcSignalReason reason);
+
extern void XidCacheRemoveRunningXids(TransactionId xid,
int nxids, const TransactionId *xids,
TransactionId latestXid);
diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h
index 20bb05b177..80d2af79dd 100644
--- a/src/include/storage/procsignal.h
+++ b/src/include/storage/procsignal.h
@@ -42,6 +42,8 @@ typedef enum
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
+ PROCSIG_HOTSTANDBY_EXIT, /* postmaster has exited hot standby */
+
NUM_PROCSIGNALS /* Must be last! */
} ProcSignalReason;
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index f5404b4c1f..429eb61174 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -26,6 +26,7 @@ extern int max_standby_streaming_delay;
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
+extern void SendHotStandbyExitSignal(void);
extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
RelFileNode node);
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index c580d91135..56c4482867 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -1178,7 +1178,8 @@ connectOptions2(PGconn *conn)
if (conn->target_session_attrs)
{
if (strcmp(conn->target_session_attrs, "any") != 0
- && strcmp(conn->target_session_attrs, "read-write") != 0)
+ && strcmp(conn->target_session_attrs, "read-write") != 0
+ && strcmp(conn->target_session_attrs, "read-only") != 0)
{
conn->status = CONNECTION_BAD;
printfPQExpBuffer(&conn->errorMessage,
@@ -2972,10 +2973,12 @@ keep_going: /* We will come back to here until there is
}
/*
- * If a read-write connection is required, see if we have one.
+ * If a specific type of connection is required, see if we have one.
*/
- if (conn->target_session_attrs != NULL &&
- strcmp(conn->target_session_attrs, "read-write") == 0)
+ /* If the server version is before 10.0, issue an SQL query. */
+ if (conn->sversion < 100000 &&
+ conn->target_session_attrs != NULL &&
+ strcmp(conn->target_session_attrs, "any") != 0)
{
/*
* We are yet to make a connection. Save all existing
@@ -3000,6 +3003,34 @@ keep_going: /* We will come back to here until there is
return PGRES_POLLING_READING;
}
+ /* Otherwise, check parameter status sent by backend to avoid round-trip for a query. */
+ if (conn->target_session_attrs != NULL &&
+ ((strcmp(conn->target_session_attrs, "read-write") == 0 && !conn->session_read_only) ||
+ (strcmp(conn->target_session_attrs, "read-only") == 0 && conn->session_read_only)))
+ {
+ /* Not suitable; close connection. */
+ appendPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("could not make a suitable "
+ "connection to server "
+ "\"%s:%s\"\n"),
+ conn->connhost[conn->whichhost].host,
+ conn->connhost[conn->whichhost].port);
+ conn->status = CONNECTION_OK;
+ sendTerminateConn(conn);
+ pqDropConnection(conn, true);
+
+ /* Skip any remaining addresses for this host. */
+ conn->addr_cur = NULL;
+ if (conn->whichhost + 1 < conn->nconnhost)
+ {
+ conn->status = CONNECTION_NEEDED;
+ goto keep_going;
+ }
+
+ /* No more addresses to try. So we fail. */
+ goto error_return;
+ }
+
/* We can release the address lists now. */
release_all_addrinfo(conn);
@@ -3036,10 +3067,10 @@ keep_going: /* We will come back to here until there is
}
/*
- * If a read-write connection is requested check for same.
+ * If a specific type of connection is required, see if we have one.
*/
if (conn->target_session_attrs != NULL &&
- strcmp(conn->target_session_attrs, "read-write") == 0)
+ strcmp(conn->target_session_attrs, "any") != 0)
{
if (!saveErrorMessage(conn, &savedMessage))
goto error_return;
@@ -3118,9 +3149,22 @@ keep_going: /* We will come back to here until there is
PQntuples(res) == 1)
{
char *val;
+ char *expected_val;
+ int expected_len;
+
+ if (strcmp(conn->target_session_attrs, "read-write") == 0)
+ {
+ expected_val = "on";
+ expected_len = 2;
+ }
+ else
+ {
+ expected_val = "off";
+ expected_len = 3;
+ }
val = PQgetvalue(res, 0, 0);
- if (strncmp(val, "on", 2) == 0)
+ if (strncmp(val, expected_val, expected_len) == 0)
{
const char *displayed_host;
const char *displayed_port;
@@ -3136,9 +3180,9 @@ keep_going: /* We will come back to here until there is
PQclear(res);
restoreErrorMessage(conn, &savedMessage);
- /* Not writable; close connection. */
+ /* Not suitable; close connection. */
appendPQExpBuffer(&conn->errorMessage,
- libpq_gettext("could not make a writable "
+ libpq_gettext("could not make a suiteable "
"connection to server "
"\"%s:%s\"\n"),
displayed_host, displayed_port);
@@ -3344,6 +3388,7 @@ makeEmptyPGconn(void)
conn->setenv_state = SETENV_STATE_IDLE;
conn->client_encoding = PG_SQL_ASCII;
conn->std_strings = false; /* unless server says differently */
+ conn->session_read_only = false; /* unless server says differently */
conn->verbosity = PQERRORS_DEFAULT;
conn->show_context = PQSHOW_CONTEXT_ERRORS;
conn->sock = PGINVALID_SOCKET;
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index c24bce62dd..6d3e5705e5 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -1007,8 +1007,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
}
/*
- * Special hacks: remember client_encoding and
- * standard_conforming_strings, and convert server version to a numeric
+ * Special hacks: remember client_encoding/
+ * standard_conforming_strings/session_read_only, and convert server version to a numeric
* form. We keep the first two of these in static variables as well, so
* that PQescapeString and PQescapeBytea can behave somewhat sanely (at
* least in single-connection-using programs).
@@ -1026,6 +1026,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value)
conn->std_strings = (strcmp(value, "on") == 0);
static_std_strings = conn->std_strings;
}
+ else if (strcmp(name, "session_read_only") == 0)
+ conn->session_read_only = (strcmp(value, "on") == 0);
else if (strcmp(name, "server_version") == 0)
{
int cnt;
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 42913604e3..b62cc9fab4 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -361,7 +361,7 @@ struct pg_conn
char *krbsrvname; /* Kerberos service name */
#endif
- /* Type of connection to make. Possible values: any, read-write. */
+ /* Type of connection to make. Possible values: any, read-write, read-only. */
char *target_session_attrs;
/* Optional file to write trace info to */
@@ -421,6 +421,7 @@ struct pg_conn
pgParameterStatus *pstatus; /* ParameterStatus data */
int client_encoding; /* encoding id */
bool std_strings; /* standard_conforming_strings */
+ bool session_read_only; /* session_read_only */
PGVerbosity verbosity; /* error/notice message verbosity */
PGContextVisibility show_context; /* whether to show CONTEXT field */
PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */
--
2.14.1
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers