Source: collectd Source-Version: 5.5.1-1 Severity: wishlist Tags: patch upstream Forwarded: https://github.com/collectd/collectd/pull/1644
Hi! We have noticed in some cases that collectd gets stuck in its process loop, and does not process the loop variable set by the SIGTERM handler. While we'll try to track that down eventually, being unable to stop or restart the daemon is a very annoying issue for us. Attached are two patches that changes the start-stop-daemon invocation to use its scheduling option, so that it eventually sends a SIGKILL if the SIGTERM has had no effect. In addition when using collectdmon we need to send some other signal to it because SIGKILL is uncatchable. The upstream pull request implements that second attached patch. Thanks, Guillem
From ecab4c5814699d9fd104e3b6d316056acbf03bda Mon Sep 17 00:00:00 2001 From: Guillem Jover <[email protected]> Date: Fri, 1 Apr 2016 16:47:49 +0200 Subject: [PATCH] collectdmon: Add support for sending SIGKILL to collectd Sometimes we might want to force termination of the collectd daemon, but we do not have visibility over the PID of the collectd daemon. So we need to request collectdmon to terminate the process for us, but we cannot send SIGKILL directly as it's an uncatchable signal. Instead we use SIGQUIT, catch that and then send SIGKILL to our child. --- src/collectdmon.c | 28 ++++++++++++++++++++++++---- src/collectdmon.pod | 5 +++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/collectdmon.c b/src/collectdmon.c index 65271dd..096b368 100644 --- a/src/collectdmon.c +++ b/src/collectdmon.c @@ -64,6 +64,7 @@ #endif /* ! WCOREDUMP */ static int loop = 0; +static int quit = 0; static int restart = 0; static char *pidfile = NULL; @@ -199,12 +200,12 @@ static int collectd_start (char **argv) exit (-1); } /* collectd_start */ -static int collectd_stop (void) +static int collectd_stop (int signo) { if (0 == collectd_pid) return 0; - if (0 != kill (collectd_pid, SIGTERM)) { + if (0 != kill (collectd_pid, signo)) { syslog (LOG_ERR, "Error: kill() failed: %s", strerror (errno)); return -1; } @@ -217,6 +218,13 @@ static void sig_int_term_handler (int __attribute__((unused)) signo) return; } /* sig_int_term_handler */ +static void sig_quit_handler (int __attribute__((unused)) signo) +{ + ++quit; + ++loop; + return; +} /* sig_quit_handler */ + static void sig_hup_handler (int __attribute__((unused)) signo) { ++restart; @@ -343,6 +351,14 @@ int main (int argc, char **argv) return 1; } + sa.sa_handler = sig_quit_handler; + + if (0 != sigaction (SIGQUIT, &sa, NULL)) { + syslog (LOG_ERR, "Error: sigaction() failed: %s", strerror (errno)); + free (collectd_argv); + return 1; + } + sa.sa_handler = sig_hup_handler; if (0 != sigaction (SIGHUP, &sa, NULL)) { @@ -362,8 +378,12 @@ int main (int argc, char **argv) assert (0 < collectd_pid); while ((collectd_pid != waitpid (collectd_pid, &status, 0)) && (EINTR == errno)) - if ((0 != loop) || (0 != restart)) - collectd_stop (); + if ((0 != loop) || (0 != restart)) { + if (quit) + collectd_stop (SIGKILL); + else + collectd_stop (SIGTERM); + } collectd_pid = 0; diff --git a/src/collectdmon.pod b/src/collectdmon.pod index 8fa62f3..6cd05e2 100644 --- a/src/collectdmon.pod +++ b/src/collectdmon.pod @@ -57,6 +57,11 @@ termination and then shut down. This signal causes B<collectdmon> to terminate B<collectd>, wait for its termination and then restart it. +=item B<SIGQUIT> + +This signal causes B<collectdmon> to terminate B<collectd> with B<SIGKILL>, +wait for its termination and then shut down. + =back =head1 SEE ALSO -- 2.8.0.rc3.226.g39d4020
From a4e763799acb86573a5712dd86376f0a982d3c17 Mon Sep 17 00:00:00 2001 From: Guillem Jover <[email protected]> Date: Fri, 1 Apr 2016 17:08:13 +0200 Subject: [PATCH] debian: Use start-stop-daemon to schedule a SIGKILL in case SIGTERM has no effect When using the collectdmon daemon we send instead a SIGQUIT which collectdmon will catch and forward to collectd as a SIGKILL. --- debian/collectd-core.collectd.init.d | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/debian/collectd-core.collectd.init.d b/debian/collectd-core.collectd.init.d index f18b994..7778d6f 100755 --- a/debian/collectd-core.collectd.init.d +++ b/debian/collectd-core.collectd.init.d @@ -112,31 +112,19 @@ the disk. You can adjust the waiting time in /etc/default/collectd." # 1 if the daemon was already stopped # 2 if daemon could not be stopped d_stop() { - PID=$( cat "$PIDFILE" 2> /dev/null ) || true - - start-stop-daemon --stop --quiet --oknodo --pidfile "$PIDFILE" + if test "$USE_COLLECTDMON" == 1; then + start-stop-daemon --stop --quiet --oknodo --pidfile "$PIDFILE" \ + --retry TERM/$MAXWAIT/QUIT/5 + else + start-stop-daemon --stop --quiet --oknodo --pidfile "$PIDFILE" \ + --retry $MAXWAIT + fi rc="$?" if test "$rc" -eq 2; then - return 2 + log_progress_msg "$still_running_warning" fi - sleep 1 - if test -n "$PID" && kill -0 $PID 2> /dev/null; then - i=0 - while kill -0 $PID 2> /dev/null; do - i=$(( $i + 2 )) - echo -n " ." - - if test $i -gt $MAXWAIT; then - log_progress_msg "$still_running_warning" - return 2 - fi - - sleep 2 - done - return "$rc" - fi return "$rc" } -- 2.8.0.rc3.226.g39d4020

