On 8/7/16 9:44 PM, Michael Paquier wrote: >>> This is not a good >>> >> idea, and the idea of putting a wait argument in get_controlfile does >>> >> not seem a good interface to me. I'd rather see get_controlfile be >>> >> extended with a flag saying no_error_on_failure and keep the wait >>> >> logic within pg_ctl. >> > >> > I guess we could write a wrapper function in pg_ctl that encapsulated >> > the wait logic. > That's what I would do.
New patches, incorporating your suggestions. I moved some of the error handling out of get_controlfile() and back into the callers, because it was getting too weird that that function knew so much about the callers' intentions. That way we don't actually have to change the signature. -- Peter Eisentraut http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 04a724675769b2412b739c408abc136d17d52794 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 26 Jul 2016 11:39:43 -0400 Subject: [PATCH v2 1/5] Make command_like output more compact Consistently print the test name, not the full command, which can be quite lenghty and include temporary directory names and other distracting details. --- src/test/perl/TestLib.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index 649fd82..c7b3262 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -276,8 +276,8 @@ sub command_like my ($stdout, $stderr); print("# Running: " . join(" ", @{$cmd}) . "\n"); my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; - ok($result, "@$cmd exit code 0"); - is($stderr, '', "@$cmd no stderr"); + ok($result, "$test_name: exit code 0"); + is($stderr, '', "$test_name: no stderr"); like($stdout, $expected_stdout, "$test_name: matches"); } -- 2.9.2
From b7c04b3f6cbe7a37359509363da0701c84063113 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 26 Jul 2016 10:48:43 -0400 Subject: [PATCH v2 2/5] pg_ctl: Add tests for promote action --- src/bin/pg_ctl/t/003_promote.pl | 39 +++++++++++++++++++++++++++++++++++++++ src/test/perl/TestLib.pm | 11 +++++++++++ 2 files changed, 50 insertions(+) create mode 100644 src/bin/pg_ctl/t/003_promote.pl diff --git a/src/bin/pg_ctl/t/003_promote.pl b/src/bin/pg_ctl/t/003_promote.pl new file mode 100644 index 0000000..1461234 --- /dev/null +++ b/src/bin/pg_ctl/t/003_promote.pl @@ -0,0 +1,39 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 9; + +my $tempdir = TestLib::tempdir; + +command_fails_like([ 'pg_ctl', '-D', "$tempdir/nonexistent", 'promote' ], + qr/directory .* does not exist/, + 'pg_ctl promote with nonexistent directory'); + +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); + +command_fails_like([ 'pg_ctl', '-D', $node_primary->data_dir, 'promote' ], + qr/PID file .* does not exist/, + 'pg_ctl promote of not running instance fails'); + +$node_primary->start; + +command_fails_like([ 'pg_ctl', '-D', $node_primary->data_dir, 'promote' ], + qr/not in standby mode/, + 'pg_ctl promote of primary instance fails'); + +my $node_standby = get_new_node('standby'); +$node_primary->backup('my_backup'); +$node_standby->init_from_backup($node_primary, 'my_backup', has_streaming => 1); +$node_standby->start; + +is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'), + 't', 'standby is in recovery'); + +command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, 'promote' ], + 'pg_ctl promote of standby runs'); + +ok($node_standby->poll_query_until('postgres', 'SELECT NOT pg_is_in_recovery()'), + 'promoted standby is not in recovery'); diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index c7b3262..51b533e 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -34,6 +34,7 @@ our @EXPORT = qw( program_version_ok program_options_handling_ok command_like + command_fails_like $windows_os ); @@ -281,4 +282,14 @@ sub command_like like($stdout, $expected_stdout, "$test_name: matches"); } +sub command_fails_like +{ + my ($cmd, $expected_stderr, $test_name) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok(!$result, "$test_name: exit code not 0"); + like($stderr, $expected_stderr, "$test_name: matches"); +} + 1; -- 2.9.2
From cf77e8419bae21a84a2a30d260a70bac2c19498a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 26 Jul 2016 11:23:43 -0400 Subject: [PATCH v2 3/5] pg_ctl: Detect current standby state from pg_control pg_ctl used to determine whether a server was in standby mode by looking for a recovery.conf file. With this change, it instead looks into pg_control, which is potentially more accurate. There are also occasional discussions about removing recovery.conf, so this removes one dependency. Add a wait_seconds argument to get_controlfile() so that pg_ctl can wait a bit for a consistent pg_control file. Otherwise, pg_ctl operations might fail on rare occasions when the server is updating the file at the same time. --- src/backend/utils/misc/pg_controldata.c | 12 +++++++++ src/bin/pg_controldata/pg_controldata.c | 4 +++ src/bin/pg_ctl/pg_ctl.c | 47 ++++++++++++++++++++++++++------- src/common/controldata_utils.c | 15 +++++------ src/include/common/controldata_utils.h | 2 +- 5 files changed, 62 insertions(+), 18 deletions(-) diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 34ee76a..4f9de83 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -52,6 +52,9 @@ pg_control_system(PG_FUNCTION_ARGS) /* read the control file */ ControlFile = get_controlfile(DataDir, NULL); + if (!ControlFile) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); values[0] = Int32GetDatum(ControlFile->pg_control_version); nulls[0] = false; @@ -128,6 +131,9 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) /* Read the control file. */ ControlFile = get_controlfile(DataDir, NULL); + if (!ControlFile) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); /* * Calculate name of the WAL file containing the latest checkpoint's REDO @@ -230,6 +236,9 @@ pg_control_recovery(PG_FUNCTION_ARGS) /* read the control file */ ControlFile = get_controlfile(DataDir, NULL); + if (!ControlFile) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); values[0] = LSNGetDatum(ControlFile->minRecoveryPoint); nulls[0] = false; @@ -295,6 +304,9 @@ pg_control_init(PG_FUNCTION_ARGS) /* read the control file */ ControlFile = get_controlfile(DataDir, NULL); + if (!ControlFile) + ereport(ERROR, + (errmsg("calculated CRC checksum does not match value stored in file"))); values[0] = Int32GetDatum(ControlFile->maxAlign); nulls[0] = false; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 96619a2..e92feab 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -156,6 +156,10 @@ main(int argc, char *argv[]) /* get a copy of the control file */ ControlFile = get_controlfile(DataDir, progname); + if (!ControlFile) + printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n" + "Either the file is corrupt, or it has a different layout than this program\n" + "is expecting. The results below are untrustworthy.\n\n")); /* * This slightly-chintzy coding will work as long as the control file diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index efc0729..85f6a92 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -19,6 +19,8 @@ #include "postgres_fe.h" +#include "catalog/pg_control.h" +#include "common/controldata_utils.h" #include "libpq-fe.h" #include "pqexpbuffer.h" @@ -96,7 +98,6 @@ static char postopts_file[MAXPGPATH]; static char version_file[MAXPGPATH]; static char pid_file[MAXPGPATH]; static char backup_file[MAXPGPATH]; -static char recovery_file[MAXPGPATH]; static char promote_file[MAXPGPATH]; #ifdef WIN32 @@ -158,6 +159,8 @@ static bool postmaster_is_alive(pid_t pid); static void unlimit_core_size(void); #endif +static DBState get_control_dbstate(void); + #ifdef WIN32 static void @@ -988,12 +991,12 @@ do_stop(void) /* * If backup_label exists, an online backup is running. Warn the user * that smart shutdown will wait for it to finish. However, if - * recovery.conf is also present, we're recovering from an online + * the server is in archive recovery, we're recovering from an online * backup instead of performing one. */ if (shutdown_mode == SMART_MODE && stat(backup_file, &statbuf) == 0 && - stat(recovery_file, &statbuf) != 0) + get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY) { print_msg(_("WARNING: online backup mode is active\n" "Shutdown will not complete until pg_stop_backup() is called.\n\n")); @@ -1076,12 +1079,12 @@ do_restart(void) /* * If backup_label exists, an online backup is running. Warn the user * that smart shutdown will wait for it to finish. However, if - * recovery.conf is also present, we're recovering from an online + * the server is in archive recovery, we're recovering from an online * backup instead of performing one. */ if (shutdown_mode == SMART_MODE && stat(backup_file, &statbuf) == 0 && - stat(recovery_file, &statbuf) != 0) + get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY) { print_msg(_("WARNING: online backup mode is active\n" "Shutdown will not complete until pg_stop_backup() is called.\n\n")); @@ -1168,7 +1171,6 @@ do_promote(void) { FILE *prmfile; pgpid_t pid; - struct stat statbuf; pid = get_pgpid(false); @@ -1187,8 +1189,7 @@ do_promote(void) exit(1); } - /* If recovery.conf doesn't exist, the server is not in standby mode */ - if (stat(recovery_file, &statbuf) != 0) + if (get_control_dbstate() != DB_IN_ARCHIVE_RECOVERY) { write_stderr(_("%s: cannot promote server; " "server is not in standby mode\n"), @@ -2115,6 +2116,35 @@ adjust_data_dir(void) } +static DBState +get_control_dbstate(void) +{ + DBState ret; + + for (;;) + { + ControlFileData *control_file_data = get_controlfile(pg_data, progname); + + if (control_file_data) + { + ret = control_file_data->state; + pfree(control_file_data); + return ret; + } + + if (wait_seconds > 0) + { + sleep(1); + wait_seconds--; + continue; + } + + write_stderr(_("%s: control file appears to be corrupt\n"), progname); + exit(1); + } +} + + int main(int argc, char **argv) { @@ -2401,7 +2431,6 @@ main(int argc, char **argv) snprintf(version_file, MAXPGPATH, "%s/PG_VERSION", pg_data); snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", pg_data); snprintf(backup_file, MAXPGPATH, "%s/backup_label", pg_data); - snprintf(recovery_file, MAXPGPATH, "%s/recovery.conf", pg_data); } switch (ctl_command) diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 5592fe7..f218d25 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -33,9 +33,11 @@ * * Get controlfile values. The caller is responsible * for pfreeing the result. + * + * Returns NULL if the CRC did not match. */ ControlFileData * -get_controlfile(char *DataDir, const char *progname) +get_controlfile(const char *DataDir, const char *progname) { ControlFileData *ControlFile; int fd; @@ -82,13 +84,10 @@ get_controlfile(char *DataDir, const char *progname) FIN_CRC32C(crc); if (!EQ_CRC32C(crc, ControlFile->crc)) -#ifndef FRONTEND - elog(ERROR, _("calculated CRC checksum does not match value stored in file")); -#else - printf(_("WARNING: Calculated CRC checksum does not match value stored in file.\n" - "Either the file is corrupt, or it has a different layout than this program\n" - "is expecting. The results below are untrustworthy.\n\n")); -#endif + { + pfree(ControlFile); + return NULL; + } /* Make sure the control file is valid byte order. */ if (ControlFile->pg_control_version % 65536 == 0 && diff --git a/src/include/common/controldata_utils.h b/src/include/common/controldata_utils.h index a355d22..f834624 100644 --- a/src/include/common/controldata_utils.h +++ b/src/include/common/controldata_utils.h @@ -12,6 +12,6 @@ #include "catalog/pg_control.h" -extern ControlFileData *get_controlfile(char *DataDir, const char *progname); +extern ControlFileData *get_controlfile(const char *DataDir, const char *progname); #endif /* COMMON_CONTROLDATA_UTILS_H */ -- 2.9.2
From 4938adf0f28ecf83ca5b0d6332b83383d916866e Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 26 Jul 2016 12:08:49 -0400 Subject: [PATCH v2 4/5] Delay updating control file to "in production" Move the updating of the control file to "in production" status until the point where WAL writes are allowed. Before, there could be a significant gap between the control file update and write transactions actually being allowed. This makes it more reliable to use the control status to verify the end of a promotion. Michael Paquier --- src/backend/access/transam/xlog.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f13f9c1..1f8d054 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7349,12 +7349,6 @@ StartupXLOG(void) */ InRecovery = false; - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - ControlFile->state = DB_IN_PRODUCTION; - ControlFile->time = (pg_time_t) time(NULL); - UpdateControlFile(); - LWLockRelease(ControlFileLock); - /* start the archive_timeout timer running */ XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL); @@ -7412,15 +7406,32 @@ StartupXLOG(void) CompleteCommitTsInitialization(); /* - * All done. Allow backends to write WAL. (Although the bool flag is - * probably atomic in itself, we use the info_lck here to ensure that - * there are no race conditions concerning visibility of other recent - * updates to shared memory.) + * All done with end-of-recovery actions. + * + * Now allow backends to write WAL and update the control file status in + * consequence. The boolean flag allowing backends to write WAL is + * updated while holding ControlFileLock to prevent other backends to look + * at an inconsistent state of the control file in shared memory. There + * is still a small window during which backends can write WAL and the + * control file is still referring to a system not in DB_IN_PRODUCTION + * state while looking at the on-disk control file. + * + * Also, although the boolean flag to allow WAL is probably atomic in + * itself, we use the info_lck here to ensure that there are no race + * conditions concerning visibility of other recent updates to shared + * memory. */ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->state = DB_IN_PRODUCTION; + ControlFile->time = (pg_time_t) time(NULL); + SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->SharedRecoveryInProgress = false; SpinLockRelease(&XLogCtl->info_lck); + UpdateControlFile(); + LWLockRelease(ControlFileLock); + /* * If there were cascading standby servers connected to us, nudge any wal * sender processes to notice that we've been promoted. -- 2.9.2
From 88d426cfb25f02aa990fe24b71a55fb4c316daff Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Fri, 5 Aug 2016 21:35:19 -0400 Subject: [PATCH v2 5/5] pg_ctl: Add wait option to promote action When waiting is selected for the promote action, look into pg_control until the state changes, then use the PQping-based waiting until the server is reachable. --- doc/src/sgml/ref/pg_ctl-ref.sgml | 29 ++++++++++++++++++++------ src/bin/pg_ctl/pg_ctl.c | 45 ++++++++++++++++++++++++++++------------ src/bin/pg_ctl/t/003_promote.pl | 18 +++++++++++++++- 3 files changed, 72 insertions(+), 20 deletions(-) diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml index 6ceb781..a00c355 100644 --- a/doc/src/sgml/ref/pg_ctl-ref.sgml +++ b/doc/src/sgml/ref/pg_ctl-ref.sgml @@ -91,6 +91,8 @@ <cmdsynopsis> <command>pg_ctl</command> <arg choice="plain"><option>promote</option></arg> + <arg choice="opt"><option>-w</option></arg> + <arg choice="opt"><option>-t</option> <replaceable>seconds</replaceable></arg> <arg choice="opt"><option>-s</option></arg> <arg choice="opt"><option>-D</option> <replaceable>datadir</replaceable></arg> </cmdsynopsis> @@ -361,8 +363,8 @@ <title>Options</title> <term><option>--timeout</option></term> <listitem> <para> - The maximum number of seconds to wait when waiting for startup or - shutdown to complete. Defaults to the value of the + The maximum number of seconds to wait when waiting for an operation + to complete (see option <option>-w</option>). Defaults to the value of the <envar>PGCTLTIMEOUT</> environment variable or, if not set, to 60 seconds. </para> @@ -383,8 +385,23 @@ <title>Options</title> <term><option>-w</option></term> <listitem> <para> - Wait for the startup or shutdown to complete. - Waiting is the default option for shutdowns, but not startups. + Wait for an operation to complete. This is supported for the + modes <literal>start</literal>, <literal>stop</literal>, + <literal>restart</literal>, <literal>promote</literal>, + and <literal>register</literal>. + </para> + + <para> + Waiting is the default option for shutdowns, but not startups, + restarts, or promotions. This is mainly for historical reasons; the + waiting option is almost always preferable. If waiting is not + selected, the requested action is triggered, but there is no feedback + about its success. In that case, the server log file or an external + monitoring system would have to be used to check the progress and + success of the operation. + </para> + + <para> When waiting for startup, <command>pg_ctl</command> repeatedly attempts to connect to the server. When waiting for shutdown, <command>pg_ctl</command> waits for @@ -400,8 +417,8 @@ <title>Options</title> <term><option>-W</option></term> <listitem> <para> - Do not wait for startup or shutdown to complete. This is the - default for start and restart modes. + Do not wait for an operation to complete. This is the opposite of the + option <option>-w</option>. </para> </listitem> </varlistentry> diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 85f6a92..82840bb 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -1228,7 +1228,34 @@ do_promote(void) exit(1); } - print_msg(_("server promoting\n")); + if (do_wait) + { + DBState state = DB_STARTUP; + + print_msg(_("waiting for server to promote...")); + while (wait_seconds > 0) + { + state = get_control_dbstate(); + if (state == DB_IN_PRODUCTION) + break; + + print_msg("."); + pg_usleep(1000000); /* 1 sec */ + wait_seconds--; + } + if (state == DB_IN_PRODUCTION) + { + print_msg(_(" done\n")); + print_msg(_("server promoted\n")); + } + else + { + print_msg(_(" stopped waiting\n")); + print_msg(_("server is still promoting\n")); + } + } + else + print_msg(_("server promoting\n")); } @@ -2405,18 +2432,10 @@ main(int argc, char **argv) if (!wait_set) { - switch (ctl_command) - { - case RESTART_COMMAND: - case START_COMMAND: - do_wait = false; - break; - case STOP_COMMAND: - do_wait = true; - break; - default: - break; - } + if (ctl_command == STOP_COMMAND) + do_wait = true; + else + do_wait = false; } if (ctl_command == RELOAD_COMMAND) diff --git a/src/bin/pg_ctl/t/003_promote.pl b/src/bin/pg_ctl/t/003_promote.pl index 1461234..0b6090b 100644 --- a/src/bin/pg_ctl/t/003_promote.pl +++ b/src/bin/pg_ctl/t/003_promote.pl @@ -3,7 +3,7 @@ use PostgresNode; use TestLib; -use Test::More tests => 9; +use Test::More tests => 12; my $tempdir = TestLib::tempdir; @@ -37,3 +37,19 @@ ok($node_standby->poll_query_until('postgres', 'SELECT NOT pg_is_in_recovery()'), 'promoted standby is not in recovery'); + +# same again with wait option +$node_standby = get_new_node('standby2'); +$node_standby->init_from_backup($node_primary, 'my_backup', has_streaming => 1); +$node_standby->start; + +is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'), + 't', 'standby is in recovery'); + +command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, '-w', 'promote' ], + 'pg_ctl -w promote of standby runs'); + +# no wait here + +is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'), + 'f', 'promoted standby is not in recovery'); -- 2.9.2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers