IMPALA-3490: Add flag to reduce minidump size IMPALA-2686 added the breakpad library to all impala daemons, thus enabling them to write minidump files. This change introduces a flag 'minidump_size_limit_hint_kb', which causes breakpad to reduce the amount of thread stack memory it includes in a minidump, aiming to reduce the minidump size during crashes with a lot of threads. Once a minidump is expected to exceed the configured value, breakpad will include the full stack memory for the first 20 threads, and afterwards capture only 2KB of stack memory for each additional thread.
Change-Id: I2f3aa0df51be9f0bf0755fb288702911cdb88052 Reviewed-on: http://gerrit.cloudera.org:8080/2990 Reviewed-by: Lars Volker <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df8bf3a9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df8bf3a9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df8bf3a9 Branch: refs/heads/master Commit: df8bf3a96551b4844ec8152aa5d4c16c5d6b6bde Parents: eaa3926 Author: Lars Volker <[email protected]> Authored: Fri May 6 23:29:07 2016 +0200 Committer: Tim Armstrong <[email protected]> Committed: Thu May 12 14:18:04 2016 -0700 ---------------------------------------------------------------------- be/src/common/global-flags.cc | 5 +++ be/src/util/minidump.cc | 10 +++++- tests/custom_cluster/test_breakpad.py | 56 ++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/be/src/common/global-flags.cc ---------------------------------------------------------------------- diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc index b3f83d9..82263e0 100644 --- a/be/src/common/global-flags.cc +++ b/be/src/common/global-flags.cc @@ -76,6 +76,11 @@ DEFINE_string(minidump_path, "/tmp/impala-minidumps", "Directory to write minidu DEFINE_int32(max_minidumps, 9, "Maximum number of minidump files to keep per daemon. " "Older files are removed first. Set to 0 to keep all minidump files."); +DEFINE_int32(minidump_size_limit_hint_kb, 20480, "Size limit hint for minidump files in " + "KB. If a minidump exceeds this value, then breakpad will reduce the stack memory it " + "collects for each thread from 8KB to 2KB. However it will always include the full " + "stack memory for the first 20 threads, including the thread that crashed."); + DEFINE_bool(load_auth_to_local_rules, false, "If true, load auth_to_local configuration " "from hdfs' core-site.xml. When enabled, impalad reads the rules from the property " "hadoop.security.auth_to_local and applies them to translate the Kerberos principal " http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/be/src/util/minidump.cc ---------------------------------------------------------------------- diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc index 713777a..861ed3c 100644 --- a/be/src/util/minidump.cc +++ b/be/src/util/minidump.cc @@ -39,8 +39,9 @@ using boost::filesystem::path; using boost::filesystem::remove; using boost::system::error_code; -DECLARE_int32(max_minidumps); DECLARE_string(minidump_path); +DECLARE_int32(max_minidumps); +DECLARE_int32(minidump_size_limit_hint_kb); #define MINIDUMP_LOG_BUF_SIZE 256 @@ -186,6 +187,13 @@ Status RegisterMinidump(const char* cmd_line_path) { google_breakpad::MinidumpDescriptor desc(FLAGS_minidump_path.c_str()); + // Limit filesize if configured. + if (FLAGS_minidump_size_limit_hint_kb > 0) { + size_t size_limit = 1024 * static_cast<int64_t>(FLAGS_minidump_size_limit_hint_kb); + LOG(INFO) << "Setting minidump size limit to " << size_limit << "."; + desc.set_size_limit(size_limit); + } + // Intentionally leaked. We want this to have the lifetime of the process. google_breakpad::ExceptionHandler* eh = new google_breakpad::ExceptionHandler(desc, NULL, DumpCallback, NULL, true, -1); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df8bf3a9/tests/custom_cluster/test_breakpad.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py index 595bbff..4345ee9 100644 --- a/tests/custom_cluster/test_breakpad.py +++ b/tests/custom_cluster/test_breakpad.py @@ -19,7 +19,7 @@ import shutil import tempfile import time -from signal import SIGSEGV +from signal import SIGSEGV, SIGKILL from tests.common.custom_cluster_test_suite import CustomClusterTestSuite @@ -56,17 +56,21 @@ class TestBreakpad(CustomClusterTestSuite): # Start default cluster for subsequent tests (verify_metrics). cls._start_impala_cluster([]) - def start_cluster(self): - cluster_options = ["""--%s='-minidump_path=%s -max_minidumps=2'""" - % (arg, self.tmp_dir) for arg in DAEMON_ARGS] + def start_cluster_with_args(self, **kwargs): + cluster_options = [] + for daemon_arg in DAEMON_ARGS: + daemon_options = " ".join("-%s=%s" % i for i in kwargs.iteritems()) + cluster_options.append("""--%s='%s'""" % (daemon_arg, daemon_options)) self._start_impala_cluster(cluster_options) + def start_cluster(self): + self.start_cluster_with_args(minidump_path=self.tmp_dir, max_minidumps=2) + def start_cluster_without_minidumps(self): - cluster_options = ["""--%s='-minidump_path= -max_minidumps=2'""" - % arg for arg in DAEMON_ARGS] - self._start_impala_cluster(cluster_options) + self.start_cluster_with_args(minidump_path='', max_minidumps=2) def kill_cluster(self, signal): + self.cluster.refresh() cluster = self.cluster for impalad in cluster.impalads: impalad.kill(signal) @@ -129,3 +133,41 @@ class TestBreakpad(CustomClusterTestSuite): self.start_cluster_without_minidumps() self.kill_cluster(SIGSEGV) self.assert_num_logfile_entries(0) + + def trigger_single_minidump_and_get_size(self): + """Kill a single impalad with SIGSEGV to make it write a minidump. Kill the rest of + the cluster. Clean up the single minidump file and return its size. + """ + assert len(self.cluster.impalads) > 0 + # Make one impalad write a minidump. + self.cluster.impalads[0].kill(SIGSEGV) + # Wait for the minidump to be written before killing the rest of the cluster. + time.sleep(1) + # Kill the rest of the cluster. + self.kill_cluster(SIGKILL) + assert self.count_minidumps('impalad') == 1 + # Get file size of that miniump. + path = os.path.join(self.tmp_dir, 'impalad') + minidump_file = glob.glob("%s/*.dmp" % path)[0] + minidump_size = os.path.getsize(minidump_file) + os.remove(minidump_file) + assert self.count_all_minidumps() == 0 + return minidump_size + + @pytest.mark.execute_serially + def test_limit_minidump_size(self): + """Check that setting the 'minidump_size_limit_hint_kb' to a small value will reduce + the minidump file size. + """ + assert self.count_all_minidumps() == 0 + # Generate minidump with default settings. + self.start_cluster() + full_minidump_size = self.trigger_single_minidump_and_get_size() + # Start cluster with limited minidump file size, we use a very small value, to ensure + # the resulting minidump will be as small as possible. + self.start_cluster_with_args(minidump_path=self.tmp_dir, + minidump_size_limit_hint_kb=1) + reduced_minidump_size = self.trigger_single_minidump_and_get_size() + # Check that the minidump file size has been reduced. + assert reduced_minidump_size < full_minidump_size +
