Michael Paquier wrote:
> + /* wait for up to a minute for promotion */
> + for (i = 0; i < WAITS_PER_SECOND * WAIT_SECONDS; ++i)
> + {
> + if (!RecoveryInProgress())
> + PG_RETURN_BOOL(true);
> +
> + pg_usleep(1000000L / WAITS_PER_SECOND);
> + }
> I would recommend to avoid pg_usleep and instead use a WaitLatch() or
> similar to generate a wait event. The wait can then also be seen in
> pg_stat_activity, which is useful for monitoring purposes. Using
> RecoveryInProgress is indeed doable, and that's more simple than what I
> thought first.
Agreed, done.
I have introduced a new wait event, because I couldn't find one that fit.
> Something I missed to mention in the previous review: the timeout should
> be manually enforceable, with a default at 60s.
Ok, added as a new parameter "wait_seconds".
> Is the function marked as strict? Per the code it should be, I am not
> able to test now though.
Yes, it is.
> You are missing REVOKE EXECUTE ON FUNCTION pg_promote() in
> system_views.sql, or any users could trigger a promotion, no?
You are right *blush*.
Fixed.
Yours,
Laurenz Albe
From 08951fea7c526450d9a632ef0e6e246dd9dba307 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <[email protected]>
Date: Fri, 19 Oct 2018 13:24:29 +0200
Subject: [PATCH] Add pg_promote() to promote standby servers
---
doc/src/sgml/func.sgml | 21 ++++++
doc/src/sgml/high-availability.sgml | 2 +-
doc/src/sgml/recovery-config.sgml | 3 +-
src/backend/access/transam/xlog.c | 6 --
src/backend/access/transam/xlogfuncs.c | 82 ++++++++++++++++++++++
src/backend/catalog/system_views.sql | 8 +++
src/backend/postmaster/pgstat.c | 3 +
src/include/access/xlog.h | 6 ++
src/include/catalog/pg_proc.dat | 4 ++
src/include/pgstat.h | 3 +-
src/test/recovery/t/004_timeline_switch.pl | 6 +-
src/test/recovery/t/009_twophase.pl | 6 +-
12 files changed, 138 insertions(+), 12 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5193df3366..88121cdc66 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18731,6 +18731,9 @@ SELECT set_config('log_statement_stats', 'off', false);
<indexterm>
<primary>pg_terminate_backend</primary>
</indexterm>
+ <indexterm>
+ <primary>pg_promote</primary>
+ </indexterm>
<indexterm>
<primary>signal</primary>
@@ -18790,6 +18793,16 @@ SELECT set_config('log_statement_stats', 'off', false);
however only superusers can terminate superuser backends.
</entry>
</row>
+ <row>
+ <entry>
+ <literal><function>pg_promote(<parameter>wait</parameter> <type>boolean</type> DEFAULT true, <parameter>wait_seconds</parameter> <type>integer</type> DEFAULT 60)</function></literal>
+ </entry>
+ <entry><type>boolean</type></entry>
+ <entry>Promote a physical standby server. This function is restricted to
+ superusers by default, but other users can be granted EXECUTE to run
+ the function.
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
@@ -18827,6 +18840,14 @@ SELECT set_config('log_statement_stats', 'off', false);
subprocess.
</para>
+ <para>
+ <function>pg_promote</function> can only be called on standby servers.
+ If the argument <parameter>wait</parameter> is <literal>true</literal>,
+ the function waits until promotion is complete or <parameter>wait_seconds</parameter>
+ seconds have passed, otherwise the function returns immediately after sending
+ the promotion signal to the postmaster.
+ </para>
+
</sect2>
<sect2 id="functions-admin-backup">
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index ebcb3daaed..f8e036965c 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1472,7 +1472,7 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
<para>
To trigger failover of a log-shipping standby server,
- run <command>pg_ctl promote</command> or create a trigger
+ run <command>pg_ctl promote</command>, call <function>pg_promote()</function>, or create a trigger
file with the file name and path specified by the <varname>trigger_file</varname>
setting in <filename>recovery.conf</filename>. If you're planning to use
<command>pg_ctl promote</command> to fail over, <varname>trigger_file</varname> is
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 92825fdf19..d06cd0b08e 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -439,7 +439,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
<para>
Specifies a trigger file whose presence ends recovery in the
standby. Even if this value is not set, you can still promote
- the standby using <command>pg_ctl promote</command>.
+ the standby using <command>pg_ctl promote</command> or calling
+ <function>pg_promote()</function>.
This setting has no effect if <varname>standby_mode</varname> is <literal>off</literal>.
</para>
</listitem>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7375a78ffc..62fc418893 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -78,12 +78,6 @@
extern uint32 bootstrap_data_checksum_version;
-/* File path names (all relative to $PGDATA) */
-#define RECOVERY_COMMAND_FILE "recovery.conf"
-#define RECOVERY_COMMAND_DONE "recovery.done"
-#define PROMOTE_SIGNAL_FILE "promote"
-#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
-
/* User-settable parameters */
int max_wal_size_mb = 1024; /* 1 GB */
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 9731742978..35f817786b 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -23,6 +23,7 @@
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "replication/walreceiver.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
@@ -35,6 +36,7 @@
#include "storage/fd.h"
#include "storage/ipc.h"
+#include <unistd.h>
/*
* Store label file and tablespace map during non-exclusive backups.
@@ -697,3 +699,83 @@ pg_backup_start_time(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(xtime);
}
+
+/*
+ * Promote a standby server.
+ *
+ * A result of "true" means that promotion has been initiated.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ int wait_seconds = PG_GETARG_INT32(1);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Only a server that is in recovery can be promoted.")));
+
+ if (wait_seconds < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"wait_seconds\" cannot be negative")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ {
+ ereport(WARNING,
+ (errmsg("could not create file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+ PG_RETURN_BOOL(false);
+ }
+
+ if (FreeFile(promote_file))
+ {
+ /* probably unreachable, but it is better to be safe */
+ ereport(WARNING,
+ (errmsg("could not write to file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+ PG_RETURN_BOOL(false);
+ }
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (!wait || wait_seconds == 0)
+ PG_RETURN_BOOL(true);
+
+ /* wait for up to a minute for promotion */
+#define WAITS_PER_SECOND 10
+ for (i = 0; i < WAITS_PER_SECOND * wait_seconds; ++i)
+ {
+ int rc;
+
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ CHECK_FOR_INTERRUPTS();
+
+ rc = WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L / WAITS_PER_SECOND,
+ WAIT_EVENT_PROMOTE);
+ ResetLatch(MyLatch);
+
+ if (!(rc & WL_TIMEOUT))
+ PG_RETURN_BOOL(false);
+ }
+
+ ereport(WARNING,
+ (errmsg("server did not promote within %d seconds", wait_seconds)));
+ PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index a03b005f73..2857d80984 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1119,6 +1119,13 @@ LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_insert';
+CREATE OR REPLACE FUNCTION
+ pg_promote(wait boolean DEFAULT true, wait_seconds integer DEFAULT 60)
+RETURNS boolean
+LANGUAGE INTERNAL
+STRICT VOLATILE
+AS 'pg_promote';
+
--
-- The default permissions for functions mean that anyone can execute them.
-- A number of functions shouldn't be executable by just anyone, but rather
@@ -1138,6 +1145,7 @@ REVOKE EXECUTE ON FUNCTION pg_rotate_logfile() FROM public;
REVOKE EXECUTE ON FUNCTION pg_reload_conf() FROM public;
REVOKE EXECUTE ON FUNCTION pg_current_logfile() FROM public;
REVOKE EXECUTE ON FUNCTION pg_current_logfile(text) FROM public;
+REVOKE EXECUTE ON FUNCTION pg_promote(boolean, integer) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset() FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_shared(text) FROM public;
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 8a5b2b3b42..bec84c8b55 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3668,6 +3668,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
case WAIT_EVENT_PROCARRAY_GROUP_UPDATE:
event_name = "ProcArrayGroupUpdate";
break;
+ case WAIT_EVENT_PROMOTE:
+ event_name = "Promote";
+ break;
case WAIT_EVENT_CLOG_GROUP_UPDATE:
event_name = "ClogGroupUpdate";
break;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e01d12eb7c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,10 +319,16 @@ extern void do_pg_abort_backup(void);
extern SessionBackupState get_backup_status(void);
/* File path names (all relative to $PGDATA) */
+#define RECOVERY_COMMAND_FILE "recovery.conf"
+#define RECOVERY_COMMAND_DONE "recovery.done"
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
#define TABLESPACE_MAP "tablespace_map"
#define TABLESPACE_MAP_OLD "tablespace_map.old"
+/* files to signal promotion to primary */
+#define PROMOTE_SIGNAL_FILE "promote"
+#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
+
#endif /* XLOG_H */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cff58ed2d8..25f1303a83 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5824,6 +5824,10 @@
proname => 'pg_backup_start_time', provolatile => 's',
prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_backup_start_time' },
+{ oid => '3436', descr => 'promote standby server',
+ proname => 'pg_promote', provolatile => 'v',
+ prorettype => 'bool', proargtypes => 'bool int4', proargnames => '{wait}',
+ prosrc => 'pg_promote' },
{ oid => '2848', descr => 'switch to new wal file',
proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn',
proargtypes => '', prosrc => 'pg_switch_wal' },
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index d59c24ae23..d8d73fec21 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -832,7 +832,8 @@ typedef enum
WAIT_EVENT_REPLICATION_ORIGIN_DROP,
WAIT_EVENT_REPLICATION_SLOT_DROP,
WAIT_EVENT_SAFE_SNAPSHOT,
- WAIT_EVENT_SYNC_REP
+ WAIT_EVENT_SYNC_REP,
+ WAIT_EVENT_PROMOTE
} WaitEventIPC;
/* ----------
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
index 34ee335129..fd440ca9dd 100644
--- a/src/test/recovery/t/004_timeline_switch.pl
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -37,9 +37,11 @@ $node_master->safe_psql('postgres',
$node_master->wait_for_catchup($node_standby_1, 'replay',
$node_master->lsn('write'));
-# Stop and remove master, and promote standby 1, switching it to a new timeline
+# Stop and remove master
$node_master->teardown_node;
-$node_standby_1->promote;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+$node_standby_1->safe_psql('postgres', "SELECT pg_promote()");
# Switch standby 2 to replay from standby 1
rmtree($node_standby_2->data_dir . '/recovery.conf');
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
index 9ea3bd65fc..a6044179bc 100644
--- a/src/test/recovery/t/009_twophase.pl
+++ b/src/test/recovery/t/009_twophase.pl
@@ -214,7 +214,11 @@ $cur_master->psql(
INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_master_name}');
PREPARE TRANSACTION 'xact_009_10';");
$cur_master->teardown_node;
-$cur_standby->promote;
+
+# promote standby using "pg_promote" and wait until it is promoted
+$cur_standby->safe_psql('postgres', 'SELECT pg_promote(FALSE)');
+$cur_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()")
+ or die "standby never exited recovery";
# change roles
note "Now paris is master and london is standby";
--
2.17.2