This re-raises the signal in the crash handler for the default signal
hanldlers to dump a core file.

Signed-off-by: MORITA Kazutaka <[email protected]>
---
 collie/collie.c | 10 +++++++++-
 include/util.h  |  1 +
 lib/logger.c    | 14 ++++++++++++--
 lib/util.c      | 28 +++++++++++++++++++++++++++-
 sheep/sheep.c   |  2 ++
 5 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/collie/collie.c b/collie/collie.c
index 2394350..045d175 100644
--- a/collie/collie.c
+++ b/collie/collie.c
@@ -320,7 +320,15 @@ static void crash_handler(int signo)
 
        sd_backtrace();
 
-       exit(EXIT_SYSFAIL);
+       /*
+        * OOM raises SIGABRT in xmalloc but the administrator expects
+        * that collie exits with EXIT_SYSFAIL.  We have to give up
+        * dumping a core file in this case.
+        */
+       if (signo == SIGABRT)
+               exit(EXIT_SYSFAIL);
+
+       reraise_crash_signal(signo, EXIT_SYSFAIL);
 }
 
 int main(int argc, char **argv)
diff --git a/include/util.h b/include/util.h
index 8ba6710..38efb8b 100644
--- a/include/util.h
+++ b/include/util.h
@@ -89,6 +89,7 @@ int purge_directory(char *dir_path);
 bool is_numeric(const char *p);
 int install_sighandler(int signum, void (*handler)(int), bool once);
 int install_crash_handler(void (*handler)(int));
+void reraise_crash_signal(int signo, int status);
 pid_t gettid(void);
 bool is_xattr_enabled(const char *path);
 
diff --git a/lib/logger.c b/lib/logger.c
index a31cb1f..d359b4d 100644
--- a/lib/logger.c
+++ b/lib/logger.c
@@ -475,9 +475,14 @@ static notrace void log_flush(void)
        }
 }
 
+static bool is_sheep_dead(int signo)
+{
+       return signo == SIGHUP;
+}
+
 static notrace void crash_handler(int signo)
 {
-       if (signo == SIGHUP)
+       if (is_sheep_dead(signo))
                sd_printf(SDOG_ERR, "sheep pid %d exited unexpectedly.",
                          sheep_pid);
        else {
@@ -489,7 +494,12 @@ static notrace void crash_handler(int signo)
        log_flush();
        closelog();
        free_logarea();
-       exit(1);
+
+       /* If the signal isn't caused by the logger crash, we simply exit. */
+       if (is_sheep_dead(signo))
+               exit(1);
+
+       reraise_crash_signal(signo, 1);
 }
 
 static notrace void logger(char *log_dir, char *outfile)
diff --git a/lib/util.c b/lib/util.c
index 39779a5..aac0fa9 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -374,13 +374,17 @@ bool is_numeric(const char *s)
        return false;
 }
 
+/*
+ * If 'once' is true, the signal will be restored to the default state
+ * after 'handler' is called.
+ */
 int install_sighandler(int signum, void (*handler)(int), bool once)
 {
        struct sigaction sa = {};
 
        sa.sa_handler = handler;
        if (once)
-               sa.sa_flags = SA_RESETHAND;
+               sa.sa_flags = SA_RESETHAND | SA_NODEFER;
        sigemptyset(&sa.sa_mask);
 
        return sigaction(signum, &sa, NULL);
@@ -395,6 +399,28 @@ int install_crash_handler(void (*handler)(int))
                install_sighandler(SIGFPE, handler, true);
 }
 
+/*
+ * Re-raise the signal 'signo' for the default signal handler to dump
+ * a core file, and exit with 'status' if the default handler cannot
+ * terminate the process.  This function is expected to be called in
+ * the installed signal handlers with install_crash_handler().
+ */
+void reraise_crash_signal(int signo, int status)
+{
+       int ret = raise(signo);
+
+       /* We won't get here normally. */
+       if (ret != 0)
+               sd_printf(SDOG_EMERG, "failed to re-raise signal %d (%s).",
+                         signo, strsignal(signo));
+       else
+               sd_printf(SDOG_EMERG, "default handler for the re-raised "
+                         "signal %d (%s) didn't work expectedly", signo,
+                         strsignal(signo));
+
+       exit(status);
+}
+
 pid_t gettid(void)
 {
        return syscall(SYS_gettid);
diff --git a/sheep/sheep.c b/sheep/sheep.c
index cda8493..b1e04a1 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -182,6 +182,8 @@ static void crash_handler(int signo)
 
        sd_backtrace();
        sd_dump_variable(__sys);
+
+       reraise_crash_signal(signo, 1);
 }
 
 static struct cluster_info __sys;
-- 
1.8.1.3.566.gaa39828

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to