On 20.08.21 20:47, Tom Lane wrote:
I think you should drop the overly-cute bit with a SIGALRM handler,
and instead have a loop-with-delay around an attempt to read the
psql.pid file, after launching the psql run without an immediate
wait for termination.  That gets rid of the first problem (though
you still want the loop to timeout eventually, it could wait up
to say 180 seconds, as we do elsewhere).  Then the second problem
is easy to solve by making the pg_sleep delay twice as much.

Here is a proposal. It waits separately for the pid file to appear and also checks for the sleep query to be registered by the backend, so it doesn't have any more dependencies on things happening "fast enough". And it's also faster in the normal case now. Thoughts?
From 4fba5b1e1b6f61acb3508b90825c668a8c6bf70c Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <[email protected]>
Date: Tue, 24 Aug 2021 18:24:17 +0200
Subject: [PATCH] Make psql cancel test more timing robust

---
 src/bin/psql/t/020_cancel.pl | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/bin/psql/t/020_cancel.pl b/src/bin/psql/t/020_cancel.pl
index 7713fff8e4..0319f6b3f1 100644
--- a/src/bin/psql/t/020_cancel.pl
+++ b/src/bin/psql/t/020_cancel.pl
@@ -7,6 +7,7 @@
 use PostgresNode;
 use TestLib;
 use Test::More tests => 2;
+use Time::HiRes qw(usleep);
 
 my $tempdir = TestLib::tempdir;
 
@@ -28,19 +29,33 @@
        my ($stdin, $stdout, $stderr);
 
        # Test whether shell supports $PPID.  It's part of POSIX, but some
-       # pre-/non-POSIX shells don't support it (e.g., NetBSD, Solaris).
+       # pre-/non-POSIX shells don't support it (e.g., NetBSD).
        $stdin = "\\! echo \$PPID";
        IPC::Run::run(['psql', '-X', '-v', 'ON_ERROR_STOP=1'], '<', \$stdin, 
'>', \$stdout, '2>', \$stderr);
        $stdout =~ /^\d+$/ or skip "shell apparently does not support \$PPID", 
2;
 
-       local $SIG{ALRM} = sub {
-               my $psql_pid = TestLib::slurp_file("$tempdir/psql.pid");
-               kill 'INT', $psql_pid;
-       };
-       alarm 1;
+       # Now start the real test
+       my $h = IPC::Run::start(['psql', '-X', '-v', 'ON_ERROR_STOP=1'], 
\$stdin, \$stdout, \$stderr);
 
-       $stdin = "\\! echo \$PPID >$tempdir/psql.pid\nselect pg_sleep(3);";
-       my $result = IPC::Run::run(['psql', '-X', '-v', 'ON_ERROR_STOP=1'], 
'<', \$stdin, '>', \$stdout, '2>', \$stderr);
+       # Get the PID
+       $stdout = '';
+       $stderr = '';
+       $stdin = "\\! echo \$PPID >$tempdir/psql.pid\n";
+       pump $h while length $stdin;
+       my $count;
+       usleep(10_000) until -s "$tempdir/psql.pid" or ($count++ > 180 * 100 
and die 'pid file did not appear');
+       my $psql_pid = TestLib::slurp_file("$tempdir/psql.pid");
+
+       # Send sleep command and wait until the server has registered it
+       $stdin = "select pg_sleep(180);\n";
+       pump $h while length $stdin;
+       $node->poll_query_until('postgres', q{SELECT (SELECT count(*) FROM 
pg_stat_activity WHERE query ~ 'pg_sleep') > 0;})
+         or die "timed out";
+
+       # Send cancel request
+       kill 'INT', $psql_pid;
+
+       my $result = finish $h;
 
        ok(!$result, 'query failed as expected');
        like($stderr, qr/canceling statement due to user request/, 'query was 
canceled');
-- 
2.33.0

Reply via email to