Repository: incubator-impala Updated Branches: refs/heads/master 4e7172f6f -> ca62ce65e
IMPALA-3684, IMPALA-3693: Disable core files for breakpad tests The breakpad tests were writing core files when triggering minidump writes. This was actually not needed and interfered with test execution and artifact collection. Most notably processes would take a long time to terminate while writing core files (IMPALA-3684). The core files would also be wrongly collected by Jenkins (IMPALA-3693). This change adds code to stop test clusters reliably, making test_breakpad independent from calling setup-impala-cluster.py via os.system. It also disables core dumps for the duration of the test and re-enables them afterwards. Change-Id: If592339632aa662b59be09d911229566d5772321 Reviewed-on: http://gerrit.cloudera.org:8080/3339 Reviewed-by: Michael Brown <[email protected]> Reviewed-by: Silvius Rus <[email protected]> Tested-by: Lars Volker <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ca62ce65 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ca62ce65 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ca62ce65 Branch: refs/heads/master Commit: ca62ce65e9c652eecc9cb883842bd43594627be8 Parents: abd6ad3 Author: Lars Volker <[email protected]> Authored: Tue Jun 7 18:24:14 2016 +0200 Committer: Tim Armstrong <[email protected]> Committed: Thu Jun 9 17:31:00 2016 -0700 ---------------------------------------------------------------------- tests/custom_cluster/test_breakpad.py | 47 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ca62ce65/tests/custom_cluster/test_breakpad.py ---------------------------------------------------------------------- diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py index 4abd34b..4b6aa2d 100644 --- a/tests/custom_cluster/test_breakpad.py +++ b/tests/custom_cluster/test_breakpad.py @@ -15,10 +15,12 @@ import glob import os import pytest +import psutil import shutil import tempfile import time +from resource import setrlimit, RLIMIT_CORE, RLIM_INFINITY from signal import SIGSEGV, SIGKILL from tests.common.custom_cluster_test_suite import CustomClusterTestSuite @@ -45,14 +47,21 @@ class TestBreakpad(CustomClusterTestSuite): def teardown_method(self, method): # Override parent # Stop the cluster to prevent future accesses to self.tmp_dir. - self._stop_impala_cluster() + self.kill_cluster(SIGKILL) assert self.tmp_dir shutil.rmtree(self.tmp_dir) @classmethod - def teardown_class(cls): + def setup_class(cls): if cls.exploration_strategy() != 'exhaustive': - return + pytest.skip('breakpad tests only run in exhaustive') + # Disable core dumps for this test + setrlimit(RLIMIT_CORE, (0, RLIM_INFINITY)) + + @classmethod + def teardown_class(cls): + # Re-enable core dumps + setrlimit(RLIMIT_CORE, (RLIM_INFINITY, RLIM_INFINITY)) # Start default cluster for subsequent tests (verify_metrics). cls._start_impala_cluster([]) @@ -71,15 +80,28 @@ class TestBreakpad(CustomClusterTestSuite): def kill_cluster(self, signal): self.cluster.refresh() - cluster = self.cluster - for impalad in cluster.impalads: - impalad.kill(signal) - cluster.statestored.kill(signal) - cluster.catalogd.kill(signal) - # Wait for daemons to finish writing minidumps - time.sleep(1) + processes = self.cluster.impalads + [self.cluster.catalogd, self.cluster.statestored] + processes = filter(None, processes) + self.kill_processes(processes, signal) self.assert_all_processes_killed() + def kill_processes(self, processes, signal): + for process in processes: + process.kill(signal) + self.wait_for_all_processes_dead(processes) + + def wait_for_all_processes_dead(self, processes, timeout=300): + for process in processes: + try: + pid = process.get_pid() + if not pid: + continue + psutil_process = psutil.Process(pid) + psutil_process.wait(timeout) + except psutil.TimeoutExpired: + raise RuntimeError("Unable to kill %s (pid %d) after %d seconds." % + (psutil_process.name, psutil_process.pid, timeout)) + def assert_all_processes_killed(self): self.cluster.refresh() assert not self.cluster.impalads @@ -158,11 +180,10 @@ class TestBreakpad(CustomClusterTestSuite): """Kill a single impalad with SIGSEGV to make it write a minidump. Kill the rest of the cluster. Clean up the single minidump file and return its size. """ + self.cluster.refresh() assert len(self.cluster.impalads) > 0 # Make one impalad write a minidump. - self.cluster.impalads[0].kill(SIGSEGV) - # Wait for the minidump to be written before killing the rest of the cluster. - time.sleep(1) + self.kill_processes(self.cluster.impalads[:1], SIGSEGV) # Kill the rest of the cluster. self.kill_cluster(SIGKILL) assert self.count_minidumps('impalad') == 1
