Repository: impala Updated Branches: refs/heads/master 1dbb3c1be -> 93ee538c5
IMPALA-7714: remove unsafe code from signal handlers IMPALA-6271 added LOG statements to some signal handlers and an exit() call to a different signal handler. These functions are not async-signal safe. The fixes are: * Use the write system call directly. I tried using glog's RAW_LOG functionality but had major issues getting it to work. * Call _exit() directly instead of exit() so that static destructors are not run. This is the same default behaviour as SIGTERM. This wans't necessary to prevent this specific crash. Testing: Could reproduce the crash by looping tests/custom_cluster/test_local_catalog.py until a minidump was produced. After this fix it did not reproduce after looping for a few hours. Ran exhaustive build. Change-Id: I52037d6510b9b34ec33d3a8b5492226aee1b9d92 Reviewed-on: http://gerrit.cloudera.org:8080/11777 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Tim Armstrong <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/93ee538c Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/93ee538c Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/93ee538c Branch: refs/heads/master Commit: 93ee538c5403509804cb18411e4407352f5b242b Parents: 1dbb3c1 Author: Tim Armstrong <[email protected]> Authored: Wed Oct 24 17:06:33 2018 -0700 Committer: Tim Armstrong <[email protected]> Committed: Fri Oct 26 01:50:16 2018 +0000 ---------------------------------------------------------------------- be/src/common/init.cc | 9 ++++++--- be/src/util/minidump.cc | 4 ++-- tests/custom_cluster/test_breakpad.py | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/be/src/common/init.cc ---------------------------------------------------------------------- diff --git a/be/src/common/init.cc b/be/src/common/init.cc index b064485..fcd6971 100644 --- a/be/src/common/init.cc +++ b/be/src/common/init.cc @@ -20,6 +20,7 @@ #include <csignal> #include <gperftools/heap-profiler.h> #include <gperftools/malloc_extension.h> +#include <third_party/lss/linux_syscall_support.h> #include "common/global-flags.h" #include "common/logging.h" @@ -187,9 +188,11 @@ static void PauseMonitorLoop() { // Signal handler for SIGTERM, that prints the message before doing an exit. [[noreturn]] static void HandleSigTerm(int signum, siginfo_t* info, void* context) { - LOG(INFO) << "Caught signal: SIGTERM. Daemon will exit. Sender UID: " << info->si_uid - << ", PID: " << info->si_pid; - exit(0); + const char* msg = "Caught signal: SIGTERM. Daemon will exit.\n"; + sys_write(STDOUT_FILENO, msg, strlen(msg)); + // _exit() is async signal safe and is equivalent to the behaviour of the default + // SIGTERM handler. exit() can run arbitrary code and is *not* safe to use here. + _exit(0); } void impala::InitCommonRuntime(int argc, char** argv, bool init_jvm, http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/be/src/util/minidump.cc ---------------------------------------------------------------------- diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc index 99d4b92..f09439f 100644 --- a/be/src/util/minidump.cc +++ b/be/src/util/minidump.cc @@ -98,8 +98,8 @@ static bool DumpCallback(const google_breakpad::MinidumpDescriptor& descriptor, /// Signal handler to write a minidump file outside of crashes. static void HandleSignal(int signum, siginfo_t* info, void* context) { - LOG(INFO) << "Caught signal: SIGUSR1. Sender UID: " << info->si_uid - << ", PID: " << info->si_pid; + const char* msg = "Caught signal: SIGUSR1\n"; + sys_write(STDOUT_FILENO, msg, strlen(msg)); minidump_exception_handler->WriteMinidump(FLAGS_minidump_path, DumpCallback, NULL); } http://git-wip-us.apache.org/repos/asf/impala/blob/93ee538c/tests/custom_cluster/test_breakpad.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py index 661b6a4..ed3e427 100644 --- a/tests/custom_cluster/test_breakpad.py +++ b/tests/custom_cluster/test_breakpad.py @@ -250,7 +250,7 @@ class TestBreakpadExhaustive(TestBreakpadBase): uid = os.getuid() # There should be a SIGTERM message in the log now # since we raised one above. - log_str = 'Caught signal: SIGTERM. Daemon will exit. Sender UID: ' + str(uid) + log_str = 'Caught signal: SIGTERM. Daemon will exit.' self.assert_impalad_log_contains('INFO', log_str, expected_count=1) @pytest.mark.execute_serially
