On Mon, Mar 15, 2021 at 10:38 AM Fujii Masao
<[email protected]> wrote:
> On 2021/03/15 12:27, Bharath Rupireddy wrote:
> > On Sun, Mar 7, 2021 at 2:39 PM Bharath Rupireddy
> > <[email protected]> wrote:
> >> Attaching v7 patch for further review.
> >
> > Attaching v8 patch after rebasing on to the latest master.
>
> Thanks for rebasing the patch!
Thanks for reviewing.
> - WAIT_EVENT_XACT_GROUP_UPDATE
> + WAIT_EVENT_XACT_GROUP_UPDATE,
> + WAIT_EVENT_BACKEND_TERMINATION
>
> These should be listed in alphabetical order.
Done.
> In pg_wait_until_termination's do-while loop, ResetLatch() should be called.
> Otherwise, it would enter busy-loop after any signal arrives. Because the
> latch is kept set and WaitLatch() always exits immediately in that case.
Done.
> + /*
> + * Wait in steps of waittime milliseconds until this function exits or
> + * timeout.
> + */
> + int64 waittime = 10;
>
> 10 ms per cycle seems too frequent?
Increased it to 100msec.
> + ereport(WARNING,
> + (errmsg("timeout cannot be negative
> or zero: %lld",
> + (long long int)
> timeout)));
> +
> + result = false;
>
> IMO the parameter should be verified before doing the actual thing.
Done.
> Why is WARNING thrown in this case? Isn't it better to throw ERROR like
> pg_promote() does?
Done.
Attaching v9 patch for further review.
With Regards,
Bharath Rupireddy.
EnterpriseDB: http://www.enterprisedb.com
From ddcf3f3f4a3dbabdf558b828c5728dfbbdc13a31 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <[email protected]>
Date: Tue, 16 Mar 2021 14:46:41 +0530
Subject: [PATCH v9] pg_terminate_backend with wait and timeout
This patch adds extra default arguments wait and timeout
to pg_terminate_backend which will be used to terminate
and wait timeout milliseconds for the backend's termination.
This patch also adds a new function
pg_wait_for_backend_termination(pid, timeout) which checks
for the existence of the backend with given PID and waits
timeout milliseconds for its termination.
---
doc/src/sgml/func.sgml | 31 +++++-
doc/src/sgml/monitoring.sgml | 4 +
src/backend/catalog/system_views.sql | 10 ++
src/backend/postmaster/pgstat.c | 3 +
src/backend/storage/ipc/signalfuncs.c | 143 +++++++++++++++++++++++++-
src/include/catalog/pg_proc.dat | 9 +-
src/include/pgstat.h | 3 +-
7 files changed, 194 insertions(+), 9 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9492a3c6b9..0a417a0b2b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24821,7 +24821,7 @@ SELECT collation for ('foo' COLLATE "de_DE");
<indexterm>
<primary>pg_terminate_backend</primary>
</indexterm>
- <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type> )
+ <function>pg_terminate_backend</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>wait</parameter> <type>boolean</type> <literal>DEFAULT</literal> <literal>false</literal>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
<returnvalue>boolean</returnvalue>
</para>
<para>
@@ -24829,7 +24829,34 @@ SELECT collation for ('foo' COLLATE "de_DE");
specified process ID. This is also allowed if the calling role
is a member of the role whose backend is being terminated or the
calling role has been granted <literal>pg_signal_backend</literal>,
- however only superusers can terminate superuser backends.
+ however only superusers can terminate superuser backends. When no
+ <parameter>wait</parameter> and <parameter>timeout</parameter> are
+ specified, this function sends SIGTERM to the backend with the given
+ process ID without waiting for its actual termination and returns <literal>true</literal>.
+ Sometimes the backend process may still exist. With <parameter>wait</parameter>
+ specified as <literal>true</literal>, the function ensures that the
+ backend process is actually terminated. It keeps checking for the
+ existence of the backend process either until the given <parameter>timeout</parameter>
+ or default 100 milliseconds and returns <literal>true</literal>. On
+ timeout a warning is emitted and <literal>false</literal> is returned.
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>pg_wait_for_backend_termination</primary>
+ </indexterm>
+ <function>pg_wait_for_backend_termination</function> ( <parameter>pid</parameter> <type>integer</type>, <parameter>timeout</parameter> <type>bigint</type> <literal>DEFAULT</literal> <literal>100</literal> )
+ <returnvalue>boolean</returnvalue>
+ </para>
+ <para>
+ Checks the existence of backend process with specified process ID. It
+ waits until the given <parameter>timeout</parameter> or the default 100
+ milliseconds. <literal>true</literal> is returned when the backend is
+ found to be not existing within the <parameter>timeout</parameter> milliseconds.
+ On timeout, a warning is emitted and <literal>false</literal> is
+ returned.
</para></entry>
</row>
</tbody>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index db4b4e460c..f362184d5e 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1569,6 +1569,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
</thead>
<tbody>
+ <row>
+ <entry><literal>BackendTermination</literal></entry>
+ <entry>Waiting for the termination of a backend.</entry>
+ </row>
<row>
<entry><literal>BackupWaitWalArchive</literal></entry>
<entry>Waiting for WAL files required for a backup to be successfully
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0dca65dc7b..85521546e3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1277,6 +1277,16 @@ CREATE OR REPLACE FUNCTION
RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_promote'
PARALLEL SAFE;
+CREATE OR REPLACE FUNCTION
+ pg_terminate_backend(pid integer, wait boolean DEFAULT false, timeout int8 DEFAULT 100)
+ RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_terminate_backend'
+ PARALLEL SAFE;
+
+CREATE OR REPLACE FUNCTION
+ pg_wait_for_backend_termination(pid integer, timeout int8 DEFAULT 100)
+ RETURNS boolean STRICT VOLATILE LANGUAGE INTERNAL AS 'pg_wait_for_backend_termination'
+ PARALLEL SAFE;
+
-- legacy definition for compatibility with 9.3
CREATE OR REPLACE FUNCTION
json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b1e2d94951..2f72241d2f 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -4130,6 +4130,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
case WAIT_EVENT_XACT_GROUP_UPDATE:
event_name = "XactGroupUpdate";
break;
+ case WAIT_EVENT_BACKEND_TERMINATION:
+ event_name = "BackendTermination";
+ break;
/* no default case, so that compiler will warn */
}
diff --git a/src/backend/storage/ipc/signalfuncs.c b/src/backend/storage/ipc/signalfuncs.c
index 69fe23a256..8d879af47c 100644
--- a/src/backend/storage/ipc/signalfuncs.c
+++ b/src/backend/storage/ipc/signalfuncs.c
@@ -18,6 +18,7 @@
#include "catalog/pg_authid.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/syslogger.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
@@ -126,15 +127,103 @@ pg_cancel_backend(PG_FUNCTION_ARGS)
}
/*
- * Signal to terminate a backend process. This is allowed if you are a member
- * of the role whose process is being terminated.
+ * Wait until there is no backend process with the given PID and return true.
+ * On timeout, a warning is emitted and false is returned.
+ */
+static bool
+pg_wait_until_termination(int pid, int64 timeout)
+{
+ /*
+ * Wait in steps of waittime milliseconds until this function exits or
+ * timeout.
+ */
+ int64 waittime = 100;
+ /*
+ * Initially remaining time is the entire timeout specified by the user.
+ */
+ int64 remainingtime = timeout;
+
+ /*
+ * Check existence of the backend. If it doesn't exist, then check for the
+ * errno ESRCH that confirms it and return true. If the backend still
+ * exists, then wait for waittime milliseconds, again check for the
+ * existence. Repeat this until timeout or an error occurs or a pending
+ * interrupt such as query cancel gets processed.
+ */
+ do
+ {
+ if (remainingtime < waittime)
+ waittime = remainingtime;
+
+ if (kill(pid, 0) == -1)
+ {
+ if (errno == ESRCH)
+ return true;
+ else
+ {
+ ereport(WARNING,
+ (errmsg("could not check the existence of the backend with PID %d: %m",
+ pid)));
+ return false;
+ }
+ }
+
+ /* Process interrupts, if any, before going for wait. */
+ CHECK_FOR_INTERRUPTS();
+
+ (void) WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+ waittime,
+ WAIT_EVENT_BACKEND_TERMINATION);
+
+ ResetLatch(MyLatch);
+
+ remainingtime -= waittime;
+ } while (remainingtime > 0);
+
+ ereport(WARNING,
+ (errmsg("could not wait for the termination of the backend with PID %d within %lld milliseconds",
+ pid, (long long int) timeout)));
+
+ return false;
+}
+
+/*
+ * Signal to terminate a backend process. This is allowed if you are a member
+ * of the role whose process is being terminated. If wait input argument is
+ * true, then wait until given timeout (default 100) milliseconds or no backend
+ * has the given PID. On timeout, a warning is emitted. Self termination is
+ * allowed but waiting is not, because once the backend is self-terminated
+ * there is no point in waiting for it to go away. If wait input argument is
+ * false (which is default), then this function just signals the backend and
+ * exits.
*
* Note that only superusers can signal superuser-owned processes.
*/
Datum
pg_terminate_backend(PG_FUNCTION_ARGS)
{
- int r = pg_signal_backend(PG_GETARG_INT32(0), SIGTERM);
+ int pid;
+ int r;
+ bool wait;
+ int timeout;
+ bool result;
+
+ pid = PG_GETARG_INT32(0);
+ wait = PG_GETARG_BOOL(1);
+
+ /* If asked to wait, check whether the timeout value is valid or not. */
+ if (wait && pid != MyProcPid)
+ {
+ timeout = PG_GETARG_INT64(2);
+
+ if (timeout <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"timeout\" must not be negative or zero")));
+ }
+
+ r = pg_signal_backend(pid, SIGTERM);
if (r == SIGNAL_BACKEND_NOSUPERUSER)
ereport(ERROR,
@@ -146,7 +235,53 @@ pg_terminate_backend(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be a member of the role whose process is being terminated or member of pg_signal_backend")));
- PG_RETURN_BOOL(r == SIGNAL_BACKEND_SUCCESS);
+ result = (r == SIGNAL_BACKEND_SUCCESS);
+
+ /*
+ * Wait only if requested and the termination is successful. Self
+ * termination is allowed but waiting is not.
+ */
+ if (wait && pid != MyProcPid && result)
+ result = pg_wait_until_termination(pid, timeout);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * This function waits for a backend with the given PID to be terminated or
+ * until the timeout milliseconds occurs. If the PID is of the backend which
+ * issued this function, either timeout can occur or the function can succeed
+ * if the backend gets terminated by some other process, say postmaster.
+ */
+Datum
+pg_wait_for_backend_termination(PG_FUNCTION_ARGS)
+{
+ int pid;
+ int64 timeout;
+ PGPROC *proc = NULL;
+ bool result = false;
+
+ pid = PG_GETARG_INT32(0);
+ timeout = PG_GETARG_INT64(1);
+
+ if (timeout <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"timeout\" must not be negative or zero")));
+
+ proc = BackendPidGetProc(pid);
+
+ if (proc == NULL)
+ {
+ ereport(WARNING,
+ (errmsg("PID %d is not a PostgreSQL server process", pid)));
+
+ PG_RETURN_BOOL(result);
+ }
+
+ result = pg_wait_until_termination(pid, timeout);
+
+ PG_RETURN_BOOL(result);
}
/*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 93393fcfd4..3f158de1dc 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6143,9 +6143,14 @@
{ oid => '2171', descr => 'cancel a server process\' current query',
proname => 'pg_cancel_backend', provolatile => 'v', prorettype => 'bool',
proargtypes => 'int4', prosrc => 'pg_cancel_backend' },
-{ oid => '2096', descr => 'terminate a server process',
+{ oid => '2096', descr => 'terminate a backend process and if specified, wait for its exit or until timeout occurs',
proname => 'pg_terminate_backend', provolatile => 'v', prorettype => 'bool',
- proargtypes => 'int4', prosrc => 'pg_terminate_backend' },
+ proargtypes => 'int4 bool int8', proargnames => '{pid,wait,timeout}',
+ prosrc => 'pg_terminate_backend' },
+{ oid => '16386', descr => 'wait for a backend process exit or timeout occurs',
+ proname => 'pg_wait_for_backend_termination', provolatile => 'v', prorettype => 'bool',
+ proargtypes => 'int4 int8', proargnames => '{pid,timeout}',
+ prosrc => 'pg_wait_for_backend_termination' },
{ oid => '2172', descr => 'prepare for taking an online backup',
proname => 'pg_start_backup', provolatile => 'v', proparallel => 'r',
prorettype => 'pg_lsn', proargtypes => 'text bool bool',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index be43c04802..bbfe2f2f04 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -967,7 +967,8 @@ typedef enum
*/
typedef enum
{
- WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE = PG_WAIT_IPC,
+ WAIT_EVENT_BACKEND_TERMINATION = PG_WAIT_IPC,
+ WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE,
WAIT_EVENT_BGWORKER_SHUTDOWN,
WAIT_EVENT_BGWORKER_STARTUP,
WAIT_EVENT_BTREE_PAGE,
--
2.25.1