On Wed, Feb 25, 2026 at 10:15 AM Chao Li <[email protected]> wrote:
Hi Chao, thanks for review.
> A few comments for v3:
>
> 1 - syncrep.c
[..]
> + proc_die_sender_pid == 0 ? 0 :
> + errdetail("Signal
> sent by PID %lld, UID %lld.",
> + (long
> long)proc_die_sender_pid, (long long)proc_die_sender_uid)
> + ));
> ```
>
> Here errdetail is used twice. I guess the second conditional one should be
> errhint.
>
> 2 - syncrep.c
[..]
> Same as comment 1.
You are right, apparently I copy/pasted code from src/backend/tcop/postgres.c
way too fast... fixed.
> 3
> ```
> +volatile uint32 proc_die_sender_pid = 0;
> +volatile uint32 proc_die_sender_uid = 0;
> ```
>
> These two globals are only written in the signal handler, I think they should
> be sig_atomic_t to ensure atomic writes.
Well the problem that sig_atomic_t is int and we need at least uint32 and
I couldn't find better way. I think that 4 bytes writes will be mostly
always atomic (for 64-bits it would depend on
PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY)
Yet I moved those little below, so it's more aligned to the other uses.
> 4
> ```
> - errmsg("terminating walreceiver
> process due to administrator command")));
> + errmsg("terminating walreceiver
> process due to administrator command"),
> + proc_die_sender_pid == 0 ? 0 :
> + errdetail("Signal sent by PID
> %lld, UID %lld.",
> + (long
> long)proc_die_sender_pid, (long long)proc_die_sender_uid)
> + ));
> ```
>
> Why do we need to format pid and uid in “long long” format? I searched over
> the source tree, the current postmaster.c just formats pid as int (%d):
> ```
> /* in parent, successful fork */
> ereport(DEBUG2,
> (errmsg_internal("forked new %s, pid=%d socket=%d",
>
> GetBackendTypeDesc(bn->bkend_type),
> (int) pid, (int)
> client_sock->sock)));
> ```
Yes, I think I was kind of lost when thinking about it (v1 had sig_atomic_t,
later had pid_t, I read somewhere about 64-bit pids, and so on) vs
platform-agnostic hell of putting that into printf). Possible I was
overthinking it
and I have reverted it to just using %d with that uint32. BTW I've also found:
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
v4 attached. Thanks again for the review.
-J.
From 5b9b093bad3ab159cb93e838ff640b37f72588b2 Mon Sep 17 00:00:00 2001
From: Jakub Wartak <[email protected]>
Date: Tue, 17 Feb 2026 12:41:01 +0100
Subject: [PATCH v4] Add errdetail() with PID and UID about source of
termination signal.
On Linux and FreeBSD we can use SA_SIGINFO to fetch additional information
about sender of the signal, which can aid troubleshooting. Sample log:
FATAL: terminating connection due to administrator command
DETAIL: Signal sent by PID 508477, UID 1000.
Author: Jakub Wartak <[email protected]>
Reviewed-by: Jim Jones <[email protected]>
Reviewed-by: Chao Li <[email protected]>
Discussion: https://www.postgresql.org/message-id/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw%40mail.gmail.com
---
configure | 42 +++++++++++++++++++++++++++++++
configure.ac | 18 +++++++++++++
meson.build | 4 +++
src/backend/replication/syncrep.c | 12 +++++++--
src/backend/tcop/postgres.c | 42 +++++++++++++++++++++++++------
src/backend/utils/init/globals.c | 2 ++
src/bin/psql/t/001_basic.pl | 7 +++---
src/include/miscadmin.h | 2 ++
src/include/pg_config.h.in | 3 +++
src/port/pqsignal.c | 35 +++++++++++++++++++++++---
10 files changed, 151 insertions(+), 16 deletions(-)
diff --git a/configure b/configure
index a285a6ec3d7..704c88aee3d 100755
--- a/configure
+++ b/configure
@@ -15693,6 +15693,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
+$as_echo_n "checking for SA_SIGINFO... " >&6; }
+if ${ac_cv_have_sa_siginfo+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+ #include <signal.h>
+ #include <stddef.h>
+
+int
+main ()
+{
+
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_have_sa_siginfo=yes
+else
+ ac_cv_have_sa_siginfo=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
+$as_echo "$ac_cv_have_sa_siginfo" >&6; }
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+
+$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
+
+fi
##
## Functions, global variables
diff --git a/configure.ac b/configure.ac
index 476a76c7991..183bc992126 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1822,6 +1822,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
+# Check for SA_SIGINFO extended signal handler availability
+AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([[
+ #include <signal.h>
+ #include <stddef.h>
+ ]], [[
+ struct sigaction sa;
+ sa.sa_flags = SA_SIGINFO;
+ ]])
+ ],
+ [ac_cv_have_sa_siginfo=yes],
+ [ac_cv_have_sa_siginfo=no])
+])
+
+if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
+ AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
+fi
##
## Functions, global variables
diff --git a/meson.build b/meson.build
index 5122706477d..3aa675e7ebb 100644
--- a/meson.build
+++ b/meson.build
@@ -2879,6 +2879,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
endif
+if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
+ cdata.set('HAVE_SA_SIGINFO', 1)
+endif
+
if cc.has_member('struct tm', 'tm_zone',
args: test_c_args, include_directories: postgres_inc,
prefix: '''
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index d1582a5d711..9b0e899ac1a 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -302,7 +302,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
ereport(WARNING,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
- errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+ errdetail("The transaction has already committed locally, but might not have been replicated to the standby."),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
whereToSendOutput = DestNone;
SyncRepCancelWait();
break;
@@ -319,7 +323,11 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
QueryCancelPending = false;
ereport(WARNING,
(errmsg("canceling wait for synchronous replication due to user request"),
- errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+ errdetail("The transaction has already committed locally, but might not have been replicated to the standby."),
+ proc_die_sender_pid == 0 ? 0 :
+ errhint("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
SyncRepCancelWait();
break;
}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index d01a09dd0c4..5735d93605c 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3357,15 +3357,27 @@ ProcessInterrupts(void)
else if (AmAutoVacuumWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating autovacuum process due to administrator command")));
+ errmsg("terminating autovacuum process due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (IsLogicalWorker())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating logical replication worker due to administrator command")));
+ errmsg("terminating logical replication worker due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (IsLogicalLauncher())
{
ereport(DEBUG1,
- (errmsg_internal("logical replication launcher shutting down")));
+ (errmsg_internal("logical replication launcher shutting down"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
/*
* The logical replication launcher can be stopped at any time.
@@ -3376,23 +3388,39 @@ ProcessInterrupts(void)
else if (AmWalReceiverProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating walreceiver process due to administrator command")));
+ errmsg("terminating walreceiver process due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (AmBackgroundWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating background worker \"%s\" due to administrator command",
- MyBgworkerEntry->bgw_type)));
+ MyBgworkerEntry->bgw_type),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
else if (AmIoWorkerProcess())
{
ereport(DEBUG1,
- (errmsg_internal("io worker shutting down due to administrator command")));
+ (errmsg_internal("io worker shutting down due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
proc_exit(0);
}
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
- errmsg("terminating connection due to administrator command")));
+ errmsg("terminating connection due to administrator command"),
+ proc_die_sender_pid == 0 ? 0 :
+ errdetail("Signal sent by PID %d, UID %d.",
+ proc_die_sender_pid, proc_die_sender_uid)
+ ));
}
if (CheckClientConnectionPending)
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 36ad708b360..80ad00fbe0a 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
+volatile uint32 proc_die_sender_pid = 0;
+volatile uint32 proc_die_sender_uid = 0;
int MyProcPid;
pg_time_t MyStartTime;
diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl
index 6839f27cbe5..7bd585d40e3 100644
--- a/src/bin/psql/t/001_basic.pl
+++ b/src/bin/psql/t/001_basic.pl
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
is($ret, 2, 'server crash: psql exit code');
like($out, qr/before/, 'server crash: output before crash');
unlike($out, qr/AFTER/, 'server crash: no output after crash');
-is( $err,
- 'psql:<stdin>:2: FATAL: terminating connection due to administrator command
-psql:<stdin>:2: server closed the connection unexpectedly
+like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
+(?:DETAIL: Signal sent by PID \d+, UID \d+.\n)?psql:<stdin>:2: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
-psql:<stdin>:2: error: connection to server was lost',
+psql:<stdin>:2: error: connection to server was lost/,
'server crash: error message');
# test \errverbose
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index f16f35659b9..38bae8bb809 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -90,6 +90,8 @@
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
+extern PGDLLIMPORT volatile uint32 proc_die_sender_pid;
+extern PGDLLIMPORT volatile uint32 proc_die_sender_uid;
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index a0bd84376e7..1fbb2ce380a 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -340,6 +340,9 @@
/* Define to 1 if you have the `rl_variable_bind' function. */
#undef HAVE_RL_VARIABLE_BIND
+/* Define to 1 if you have SA_SIGINFO available. */
+#undef HAVE_SA_SIGINFO
+
/* Define to 1 if you have the <security/pam_appl.h> header file. */
#undef HAVE_SECURITY_PAM_APPL_H
diff --git a/src/port/pqsignal.c b/src/port/pqsignal.c
index fbdf9341c2f..5e61739fdc3 100644
--- a/src/port/pqsignal.c
+++ b/src/port/pqsignal.c
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
*
* This wrapper also handles restoring the value of errno.
*/
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+static void
+wrapper_handler(int signo, siginfo_t *info, void *context)
+#else
static void
wrapper_handler(SIGNAL_ARGS)
+#endif
{
int save_errno = errno;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ /* SIGNAL_ARGS defines postgres_signal_arg */
+ int postgres_signal_arg = signo;
+#endif
Assert(postgres_signal_arg > 0);
Assert(postgres_signal_arg < PG_NSIG);
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
raise(postgres_signal_arg);
return;
}
+
+#ifdef HAVE_SA_SIGINFO
+ if (signo == SIGTERM && info)
+ {
+ proc_die_sender_pid = info->si_pid;
+ proc_die_sender_uid = info->si_uid;
+ }
+#endif
#endif
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
#if !(defined(WIN32) && defined(FRONTEND))
struct sigaction act;
#endif
+ bool use_wrapper = false;
Assert(signo > 0);
Assert(signo < PG_NSIG);
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
if (func != SIG_IGN && func != SIG_DFL)
{
pqsignal_handlers[signo] = func; /* assumed atomic */
- func = wrapper_handler;
+ use_wrapper = true;
}
#if !(defined(WIN32) && defined(FRONTEND))
- act.sa_handler = func;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
+#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
+ if (use_wrapper)
+ {
+ act.sa_sigaction = wrapper_handler;
+ act.sa_flags |= SA_SIGINFO;
+ }
+ else
+ act.sa_handler = func;
+#else
+ act.sa_handler = use_wrapper ? wrapper_handler : func;
+#endif
+
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
Assert(false); /* probably indicates coding error */
#else
/* Forward to Windows native signal system. */
- if (signal(signo, func) == SIG_ERR)
+ if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
Assert(false); /* probably indicates coding error */
#endif
}
--
2.43.0