This re-raises the signal in the crash handler for the default signal hanldlers to dump a core file.
Signed-off-by: MORITA Kazutaka <[email protected]> --- collie/collie.c | 10 +++++++++- include/util.h | 1 + lib/logger.c | 14 ++++++++++++-- lib/util.c | 28 +++++++++++++++++++++++++++- sheep/sheep.c | 2 ++ 5 files changed, 51 insertions(+), 4 deletions(-) diff --git a/collie/collie.c b/collie/collie.c index 2394350..045d175 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -320,7 +320,15 @@ static void crash_handler(int signo) sd_backtrace(); - exit(EXIT_SYSFAIL); + /* + * OOM raises SIGABRT in xmalloc but the administrator expects + * that collie exits with EXIT_SYSFAIL. We have to give up + * dumping a core file in this case. + */ + if (signo == SIGABRT) + exit(EXIT_SYSFAIL); + + reraise_crash_signal(signo, EXIT_SYSFAIL); } int main(int argc, char **argv) diff --git a/include/util.h b/include/util.h index 8ba6710..38efb8b 100644 --- a/include/util.h +++ b/include/util.h @@ -89,6 +89,7 @@ int purge_directory(char *dir_path); bool is_numeric(const char *p); int install_sighandler(int signum, void (*handler)(int), bool once); int install_crash_handler(void (*handler)(int)); +void reraise_crash_signal(int signo, int status); pid_t gettid(void); bool is_xattr_enabled(const char *path); diff --git a/lib/logger.c b/lib/logger.c index a31cb1f..d359b4d 100644 --- a/lib/logger.c +++ b/lib/logger.c @@ -475,9 +475,14 @@ static notrace void log_flush(void) } } +static bool is_sheep_dead(int signo) +{ + return signo == SIGHUP; +} + static notrace void crash_handler(int signo) { - if (signo == SIGHUP) + if (is_sheep_dead(signo)) sd_printf(SDOG_ERR, "sheep pid %d exited unexpectedly.", sheep_pid); else { @@ -489,7 +494,12 @@ static notrace void crash_handler(int signo) log_flush(); closelog(); free_logarea(); - exit(1); + + /* If the signal isn't caused by the logger crash, we simply exit. */ + if (is_sheep_dead(signo)) + exit(1); + + reraise_crash_signal(signo, 1); } static notrace void logger(char *log_dir, char *outfile) diff --git a/lib/util.c b/lib/util.c index 39779a5..aac0fa9 100644 --- a/lib/util.c +++ b/lib/util.c @@ -374,13 +374,17 @@ bool is_numeric(const char *s) return false; } +/* + * If 'once' is true, the signal will be restored to the default state + * after 'handler' is called. + */ int install_sighandler(int signum, void (*handler)(int), bool once) { struct sigaction sa = {}; sa.sa_handler = handler; if (once) - sa.sa_flags = SA_RESETHAND; + sa.sa_flags = SA_RESETHAND | SA_NODEFER; sigemptyset(&sa.sa_mask); return sigaction(signum, &sa, NULL); @@ -395,6 +399,28 @@ int install_crash_handler(void (*handler)(int)) install_sighandler(SIGFPE, handler, true); } +/* + * Re-raise the signal 'signo' for the default signal handler to dump + * a core file, and exit with 'status' if the default handler cannot + * terminate the process. This function is expected to be called in + * the installed signal handlers with install_crash_handler(). + */ +void reraise_crash_signal(int signo, int status) +{ + int ret = raise(signo); + + /* We won't get here normally. */ + if (ret != 0) + sd_printf(SDOG_EMERG, "failed to re-raise signal %d (%s).", + signo, strsignal(signo)); + else + sd_printf(SDOG_EMERG, "default handler for the re-raised " + "signal %d (%s) didn't work expectedly", signo, + strsignal(signo)); + + exit(status); +} + pid_t gettid(void) { return syscall(SYS_gettid); diff --git a/sheep/sheep.c b/sheep/sheep.c index cda8493..b1e04a1 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -182,6 +182,8 @@ static void crash_handler(int signo) sd_backtrace(); sd_dump_variable(__sys); + + reraise_crash_signal(signo, 1); } static struct cluster_info __sys; -- 1.8.1.3.566.gaa39828 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
