On 5/1/17 12:19, Peter Eisentraut wrote: > On 4/27/17 08:41, Michael Paquier wrote: >> +$node_slave->promote; >> +$node_slave->poll_query_until('postgres', >> + "SELECT NOT pg_is_in_recovery()") >> + or die "Timed out while waiting for promotion of standby"; >> >> This reminds me that we should really switch PostgresNode::promote to >> use the wait mode of pg_ctl promote, and remove all those polling >> queries... > > I was going to say: This should all be obsolete already, because pg_ctl > promote waits by default. > > However: Failure to complete promotion within the waiting time does not > lead to an error exit, so you will not get a failure if the promotion > does not finish. This is probably a mistake. Looking around pg_ctl, I > found that this was handled seemingly inconsistently in do_start(), but > do_stop() errors when it does not complete. > > Possible patches for this attached. > > Perhaps we need a separate exit code in pg_ctl to distinguish general > errors from did not finish within timeout?
I was going to hold this back for PG11, but since we're now doing some other tweaks in pg_ctl, it might be useful to add this too. Thoughts? -- Peter Eisentraut http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 67707d541a2d9e088109385c8fa1eced8af83d54 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Mon, 1 May 2017 12:10:17 -0400 Subject: [PATCH v2 1/2] pg_ctl: Make failure to complete operation a nonzero exit If an operation being waited for does not complete within the timeout, then exit with a nonzero exit status. This was previously handled inconsistently. --- doc/src/sgml/ref/pg_ctl-ref.sgml | 7 +++++++ src/bin/pg_ctl/pg_ctl.c | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml index 71e52c4c35..12fa011c4e 100644 --- a/doc/src/sgml/ref/pg_ctl-ref.sgml +++ b/doc/src/sgml/ref/pg_ctl-ref.sgml @@ -412,6 +412,13 @@ <title>Options</title> <command>pg_ctl</command> returns an exit code based on the success of the startup or shutdown. </para> + + <para> + If the operation does not complete within the timeout (see + option <option>-t</option>), then <command>pg_ctl</command> exits with + a nonzero exit status. But note that the operation might continue in + the background and eventually succeed. + </para> </listitem> </varlistentry> diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 0c65196bda..4e02c4cea1 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -840,7 +840,9 @@ do_start(void) break; case POSTMASTER_STILL_STARTING: print_msg(_(" stopped waiting\n")); - print_msg(_("server is still starting up\n")); + write_stderr(_("%s: server did not start in time\n"), + progname); + exit(1); break; case POSTMASTER_FAILED: print_msg(_(" stopped waiting\n")); @@ -1166,7 +1168,9 @@ do_promote(void) else { print_msg(_(" stopped waiting\n")); - print_msg(_("server is still promoting\n")); + write_stderr(_("%s: server did not promote in time\n"), + progname); + exit(1); } } else -- 2.13.1
From b30b7d96161a2e27d80cc96073b44c5266c2b751 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Mon, 1 May 2017 12:11:25 -0400 Subject: [PATCH v2 2/2] Remove unnecessary pg_is_in_recovery calls in tests Since pg_ctl promote already waits for recovery to end, these calls are obsolete. --- src/test/modules/commit_ts/t/003_standby_2.pl | 1 - src/test/recovery/t/008_fsm_truncation.pl | 2 -- src/test/recovery/t/009_twophase.pl | 6 ------ src/test/recovery/t/010_logical_decoding_timelines.pl | 3 --- src/test/recovery/t/012_subtransactions.pl | 6 ------ 5 files changed, 18 deletions(-) diff --git a/src/test/modules/commit_ts/t/003_standby_2.pl b/src/test/modules/commit_ts/t/003_standby_2.pl index 2fd561115c..c3000f5b4c 100644 --- a/src/test/modules/commit_ts/t/003_standby_2.pl +++ b/src/test/modules/commit_ts/t/003_standby_2.pl @@ -55,7 +55,6 @@ $master->restart; system_or_bail('pg_ctl', '-D', $standby->data_dir, 'promote'); -$standby->poll_query_until('postgres', "SELECT pg_is_in_recovery() <> true"); $standby->safe_psql('postgres', "create table t11()"); my $standby_ts = $standby->safe_psql('postgres', diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl index 56eecf722c..ddab464a97 100644 --- a/src/test/recovery/t/008_fsm_truncation.pl +++ b/src/test/recovery/t/008_fsm_truncation.pl @@ -83,8 +83,6 @@ # Promote the standby $node_standby->promote; -$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_standby->psql('postgres', 'checkpoint'); # Restart to discard in-memory copy of FSM diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 13b4a04205..376cb09a25 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -195,8 +195,6 @@ PREPARE TRANSACTION 'xact_009_1';"); $node_master->teardown_node; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $psql_rc = $node_slave->psql('postgres', "COMMIT PREPARED 'xact_009_1'"); is($psql_rc, '0', "Restore of prepared transaction on promoted slave"); @@ -227,8 +225,6 @@ $node_master->stop; $node_slave->restart; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_slave->psql( 'postgres', @@ -264,8 +260,6 @@ $node_slave->teardown_node; $node_slave->start; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_slave->psql( 'postgres', diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl index 65f6ba2fca..98418128d2 100644 --- a/src/test/recovery/t/010_logical_decoding_timelines.pl +++ b/src/test/recovery/t/010_logical_decoding_timelines.pl @@ -135,9 +135,6 @@ $node_master->stop('immediate'); $node_replica->promote; -print "waiting for replica to come up\n"; -$node_replica->poll_query_until('postgres', - "SELECT NOT pg_is_in_recovery();"); $node_replica->safe_psql('postgres', "INSERT INTO decoding(blah) VALUES ('after failover');"); diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl index 30677e1675..c99733cad7 100644 --- a/src/test/recovery/t/012_subtransactions.pl +++ b/src/test/recovery/t/012_subtransactions.pl @@ -109,8 +109,6 @@ is($psql_out, '8128', "Visible"); $node_master->stop; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_slave->psql( 'postgres', @@ -162,8 +160,6 @@ is($psql_out, '-1', "Not visible"); $node_master->stop; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_slave->psql( 'postgres', @@ -205,8 +201,6 @@ is($psql_out, '-1', "Not visible"); $node_master->stop; $node_slave->promote; -$node_slave->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") - or die "Timed out while waiting for promotion of standby"; $node_slave->psql( 'postgres', -- 2.13.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers