On Thu, Feb 26, 2026 at 4:09 AM Chao Li <[email protected]> wrote:
Hi Chao,
> I just reviewed v4 again and got a few more comments:
>
> 1. This patch only set the global proc_die_sender_pid/uid to 0 at startup,
> then assign values to them upon receiving SIGTERM, and never reset them,
> which assumes a process must die upon SIGTERM. Is the assumption true? I
> guess not. If a process receives SIGTERM and not die immediately, then die
> for other reason, then it may report a misleading PID and UID.
Hmm, I'm not sure I follow. If we receive SIGTERM and not die immediately
(for whatever reason), then two scenarios can happen as far as I'm concerned:
* another SIGTERM comes in from the same or different uid/pid and it wll be
reported properly
* different SIGKILL, but in this case we won't report UID/PID at all
am I missing something or do You have any particular scenario in mind?
The flow will be wrapper_handler()->die()->SetLatch()->..->directyl to
err reporting facilities.
> 2.
> syncrpe.c uses errhint to print PID and UID, and postgres.c uses errdetail.
> We should keep consistency, maybe all use errhint.
Right, let's make it that way.
> 3.
> ```
> @@ -319,7 +323,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
> QueryCancelPending = false;
> ereport(WARNING,
> (errmsg("canceling wait for
> synchronous replication due to user request"),
> - errdetail("The transaction has
> already committed locally, but might not have been replicated to the
> standby.")));
> + errdetail("The transaction has
> already committed locally, but might not have been replicated to the
> standby."),
> + proc_die_sender_pid == 0 ? 0 :
> + errhint("Signal sent
> by PID %d, UID %d.",
> +
> proc_die_sender_pid, proc_die_sender_uid)
> + ));
> SyncRepCancelWait();
> break;
> }
> ```
>
> I don’t think the query cancel case relates to SIGTERM, so we don’t need to
> log PID and UID here.
Right, it was superfluous.
v5 attached.
-J.
From fa2b26d736a8bd6a830789be3dafed1201be9a5f Mon Sep 17 00:00:00 2001
From: Jakub Wartak <[email protected]>
Date: Tue, 17 Feb 2026 12:41:01 +0100
Subject: [PATCH v5] Add errdetail() with PID and UID about source of
termination signal.
On Linux and FreeBSD we can use SA_SIGINFO to fetch additional information
about sender of the signal, which can aid troubleshooting. Sample log:
FATAL: terminating connection due to administrator command
DETAIL: Signal sent by PID 508477, UID 1000.
Author: Jakub Wartak <[email protected]>
Reviewed-by: Jim Jones <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Discussion: https://www.postgresql.org/message-id/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw%40mail.gmail.com
---
configure | 42 +++++++++++++++++++++++++++++++
configure.ac | 18 +++++++++++++
meson.build | 4 +++
src/backend/replication/syncrep.c | 6 ++++-
src/backend/tcop/postgres.c | 42 +++++++++++++++++++++++++------
src/backend/utils/init/globals.c | 2 ++
src/bin/psql/t/001_basic.pl | 7 +++---
src/include/miscadmin.h | 2 ++
src/include/pg_config.h.in | 3 +++
src/port/pqsignal.c | 35 +++++++++++++++++++++++---
10 files changed, 146 insertions(+), 15 deletions(-)
diff --git a/configure b/configure
index a285a6ec3d7..704c88aee3d 100755
--- a/configure
+++ b/configure
@@ -15693,6 +15693,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
+$as_echo_n "checking for SA_SIGINFO... " >&6; }
+if ${ac_cv_have_sa_siginfo+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+ #include <signal.h>
+ #include <stddef.h>
+
+int
+main ()
+{
+
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_have_sa_siginfo=yes
+else
+ ac_cv_have_sa_siginfo=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
+$as_echo "$ac_cv_have_sa_siginfo" >&6; }
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+
+$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
+
+fi
##
## Functions, global variables
diff --git a/configure.ac b/configure.ac
index 476a76c7991..183bc992126 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1822,6 +1822,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([[
+ #include <signal.h>
+ #include <stddef.h>
+ ]], [[
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+ ]])
+ ],
+ [ac_cv_have_sa_siginfo=yes],
+ [ac_cv_have_sa_siginfo=no])
+])
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+ AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
+fi
##
## Functions, global variables
diff --git a/meson.build b/meson.build
index 5122706477d..3aa675e7ebb 100644
--- a/meson.build
+++ b/meson.build
@@ -2879,6 +2879,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
endif
+if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
+ cdata.set('HAVE_SA_SIGINFO', 1)
+endif
+
if cc.has_member('struct tm', 'tm_zone',
args: test_c_args, include_directories: postgres_inc,
prefix: '''
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index d1582a5d711..fd5fb1e01f9 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -302,7 +302,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
ereport(WARNING,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
- errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+ errdetail("The transaction has already committed locally, but might not have been replicated to the standby."),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
whereToSendOutput = DestNone;
SyncRepCancelWait();
break;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index d01a09dd0c4..e1fad1e895b 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3357,15 +3357,27 @@ ProcessInterrupts(void)
else if (AmAutoVacuumWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating autovacuum process due to administrator command")));
+ errmsg("terminating autovacuum process due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (IsLogicalWorker())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating logical replication worker due to administrator command")));
+ errmsg("terminating logical replication worker due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (IsLogicalLauncher())
{
ereport(DEBUG1,
- (errmsg_internal("logical replication launcher shutting down")));
+ (errmsg_internal("logical replication launcher shutting down"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
/*
* The logical replication launcher can be stopped at any time.
@@ -3376,23 +3388,39 @@ ProcessInterrupts(void)
else if (AmWalReceiverProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating walreceiver process due to administrator command")));
+ errmsg("terminating walreceiver process due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (AmBackgroundWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating background worker \"%s\" due to administrator command",
- MyBgworkerEntry->bgw_type)));
+ MyBgworkerEntry->bgw_type),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (AmIoWorkerProcess())
{
ereport(DEBUG1,
- (errmsg_internal("io worker shutting down due to administrator command")));
+ (errmsg_internal("io worker shutting down due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
proc_exit(0);
}
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating connection due to administrator command")));
+ errmsg("terminating connection due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
}
if (CheckClientConnectionPending)
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 36ad708b360..80ad00fbe0a 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
+volatile uint32 proc_die_sender_pid = 0;
+volatile uint32 proc_die_sender_uid = 0;
int MyProcPid;
pg_time_t MyStartTime;
diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl
index 6839f27cbe5..9f45c33b5f9 100644
--- a/src/bin/psql/t/001_basic.pl
+++ b/src/bin/psql/t/001_basic.pl
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
is($ret, 2, 'server crash: psql exit code');
like($out, qr/before/, 'server crash: output before crash');
unlike($out, qr/AFTER/, 'server crash: no output after crash');
-is( $err,
- 'psql:<stdin>:2: FATAL: terminating connection due to administrator command
-psql:<stdin>:2: server closed the connection unexpectedly
+like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
+(?:HINT: Signal sent by PID \d+, UID \d+.\n)?psql:<stdin>:2: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
-psql:<stdin>:2: error: connection to server was lost',
+psql:<stdin>:2: error: connection to server was lost/,
'server crash: error message');
# test \errverbose
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index f16f35659b9..38bae8bb809 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -90,6 +90,8 @@
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
+extern PGDLLIMPORT volatile uint32 proc_die_sender_pid;
+extern PGDLLIMPORT volatile uint32 proc_die_sender_uid;
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index a0bd84376e7..1fbb2ce380a 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -340,6 +340,9 @@
/* Define to 1 if you have the `rl_variable_bind' function. */
#undef HAVE_RL_VARIABLE_BIND
+/* Define to 1 if you have SA_SIGINFO available. */
+#undef HAVE_SA_SIGINFO
+
/* Define to 1 if you have the <security/pam_appl.h> header file. */
#undef HAVE_SECURITY_PAM_APPL_H
diff --git a/src/port/pqsignal.c b/src/port/pqsignal.c
index fbdf9341c2f..5e61739fdc3 100644
--- a/src/port/pqsignal.c
+++ b/src/port/pqsignal.c
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
*
* This wrapper also handles restoring the value of errno.
*/
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+static void
+wrapper_handler(int signo, siginfo_t *info, void *context)
+#else
static void
wrapper_handler(SIGNAL_ARGS)
+#endif
{
int save_errno = errno;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ /* SIGNAL_ARGS defines postgres_signal_arg */
+ int postgres_signal_arg = signo;
+#endif
Assert(postgres_signal_arg > 0);
Assert(postgres_signal_arg < PG_NSIG);
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
raise(postgres_signal_arg);
return;
}
+
+#ifdef HAVE_SA_SIGINFO
+ if (signo == SIGTERM && info)
+ {
+ proc_die_sender_pid = info->si_pid;
+ proc_die_sender_uid = info->si_uid;
+ }
+#endif
#endif
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
#if !(defined(WIN32) && defined(FRONTEND))
struct sigaction act;
#endif
+ bool use_wrapper = false;
Assert(signo > 0);
Assert(signo < PG_NSIG);
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
if (func != SIG_IGN && func != SIG_DFL)
{
pqsignal_handlers[signo] = func; /* assumed atomic */
- func = wrapper_handler;
+ use_wrapper = true;
}
#if !(defined(WIN32) && defined(FRONTEND))
- act.sa_handler = func;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ if (use_wrapper)
+ {
+ act.sa_sigaction = wrapper_handler;
+ act.sa_flags |= SA_SIGINFO;
+ }
+ else
+ act.sa_handler = func;
+#else
+ act.sa_handler = use_wrapper ? wrapper_handler : func;
+#endif
+
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
Assert(false); /* probably indicates coding error */
#else
/* Forward to Windows native signal system. */
- if (signal(signo, func) == SIG_ERR)
+ if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
Assert(false); /* probably indicates coding error */
#endif
}
--
2.43.0