Here is a new version of the stopgap/back-branch fix for restore_command. This is more or less a rebased version of v4 with an added stderr message as Andres suggested upthread.
-- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 20edca59834c7755bfddb070fb9db3f59dc6ff96 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Tue, 14 Feb 2023 09:44:53 -0800 Subject: [PATCH v5 1/1] stopgap fix for restore_command --- src/backend/access/transam/xlogarchive.c | 15 +++++++++++---- src/backend/postmaster/startup.c | 20 +++++++++++++++++++- src/backend/storage/ipc/ipc.c | 3 +++ src/backend/storage/lmgr/proc.c | 2 ++ 4 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index fcc87ff44f..41684418b6 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -159,20 +159,27 @@ RestoreArchivedFile(char *path, const char *xlogfname, (errmsg_internal("executing restore command \"%s\"", xlogRestoreCmd))); + fflush(NULL); + pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND); + /* - * Check signals before restore command and reset afterwards. + * PreRestoreCommand() informs the SIGTERM handler for the startup process + * that it should proc_exit() right away. This is done for the duration of + * the system() call because there isn't a good way to break out while it + * is executing. Since we might call proc_exit() in a signal handler, it + * is best to put any additional logic before or after the + * PreRestoreCommand()/PostRestoreCommand() section. */ PreRestoreCommand(); /* * Copy xlog from archival storage to XLOGDIR */ - fflush(NULL); - pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND); rc = system(xlogRestoreCmd); - pgstat_report_wait_end(); PostRestoreCommand(); + + pgstat_report_wait_end(); pfree(xlogRestoreCmd); if (rc == 0) diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index efc2580536..de2b56c2fa 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -19,6 +19,8 @@ */ #include "postgres.h" +#include <unistd.h> + #include "access/xlog.h" #include "access/xlogrecovery.h" #include "access/xlogutils.h" @@ -121,7 +123,23 @@ StartupProcShutdownHandler(SIGNAL_ARGS) int save_errno = errno; if (in_restore_command) - proc_exit(1); + { + /* + * If we are in a child process (e.g., forked by system() in + * RestoreArchivedFile()), we don't want to call any exit callbacks. + * The parent will take care of that. + */ + if (MyProcPid == (int) getpid()) + proc_exit(1); + else + { + const char msg[] = "StartupProcShutdownHandler() called in child process"; + int rc pg_attribute_unused(); + + rc = write(STDERR_FILENO, msg, sizeof(msg)); + _exit(1); + } + } else shutdown_requested = true; WakeupRecovery(); diff --git a/src/backend/storage/ipc/ipc.c b/src/backend/storage/ipc/ipc.c index 1904d21795..6796cabc3e 100644 --- a/src/backend/storage/ipc/ipc.c +++ b/src/backend/storage/ipc/ipc.c @@ -103,6 +103,9 @@ static int on_proc_exit_index, void proc_exit(int code) { + /* proc_exit() is not safe in forked processes from system(), etc. */ + Assert(MyProcPid == getpid()); + /* Clean up everything that must be cleaned up */ proc_exit_prepare(code); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 22b4278610..ae845e8249 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -805,6 +805,7 @@ ProcKill(int code, Datum arg) dlist_head *procgloballist; Assert(MyProc != NULL); + Assert(MyProcPid == getpid()); /* not safe if forked by system(), etc. */ /* Make sure we're out of the sync rep lists */ SyncRepCleanupAtProcExit(); @@ -925,6 +926,7 @@ AuxiliaryProcKill(int code, Datum arg) PGPROC *proc; Assert(proctype >= 0 && proctype < NUM_AUXILIARY_PROCS); + Assert(MyProcPid == getpid()); /* not safe if forked by system(), etc. */ auxproc = &AuxiliaryProcs[proctype]; -- 2.25.1