Repository: aurora Updated Branches: refs/heads/master 5410c229f -> 84bde070f
Fix regression in 5410c22. The hard dependency on `prctl` broke thermos unit tests both on Apache Jenkins and OS X. This adopts serb's suggestion https://reviews.apache.org/r/53418/ and wraps the `prcl(2)` call in a try except block. This also exposed some flakyness in `TestRunnerKillProcessGroup.test_pg_is_killed`. Marked the test as flaky and filed AURORA-1809. Testing Done: ./pants test.pytest --junit-xml-dir="$PWD/dist/test-results" src/{main,test}/python:: -- -v src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh Reviewed at https://reviews.apache.org/r/53508/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/84bde070 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/84bde070 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/84bde070 Branch: refs/heads/master Commit: 84bde070f593f149c9e35b24e5781960b82dec80 Parents: 5410c22 Author: Zameer Manji <[email protected]> Authored: Fri Nov 4 18:11:46 2016 -0700 Committer: Zameer Manji <[email protected]> Committed: Fri Nov 4 18:11:46 2016 -0700 ---------------------------------------------------------------------- RELEASE-NOTES.md | 1 - .../apache/thermos/common/process_util.py | 27 ++++++++++++-------- src/main/python/apache/thermos/core/process.py | 12 +-------- .../apache/thermos/core/test_staged_kill.py | 1 + 4 files changed, 18 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/RELEASE-NOTES.md ---------------------------------------------------------------------- diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 94224be..374bee6 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -11,7 +11,6 @@ Containerizer if the scheduler is running with the `-allow_container_volumes` flag. * The executor will send SIGTERM to processes that self daemonize via double forking. -* The executor now requires Linux kernel 3.4 or later. ### Deprecations and removals: http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/main/python/apache/thermos/common/process_util.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/thermos/common/process_util.py b/src/main/python/apache/thermos/common/process_util.py index c63b9af..637b025 100644 --- a/src/main/python/apache/thermos/common/process_util.py +++ b/src/main/python/apache/thermos/common/process_util.py @@ -57,18 +57,23 @@ def setup_child_subreaping(): and here: https://lwn.net/Articles/474787/ Callers should reap terminal children to prevent zombies. - - raises OSError if the underlying prctl call fails. - raises RuntimeError if libc cannot be found. """ log.debug("Calling prctl(2) with PR_SET_CHILD_SUBREAPER") # This constant is taken from prctl.h PR_SET_CHILD_SUBREAPER = 36 - library_name = ctypes.util.find_library('c') - if library_name is None: - raise RuntimeError("libc not found") - libc = ctypes.CDLL(library_name, use_errno=True) - ret = libc.prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) - if ret != 0: - errno = ctypes.get_errno() - raise OSError(errno, os.strerror(errno)) + try: + library_name = ctypes.util.find_library('c') + if library_name is None: + log.warning("libc is not found. Unable to call prctl!") + log.warning("Children subreaping is disabled!") + return + libc = ctypes.CDLL(library_name, use_errno=True) + # If we are on a system where prctl doesn't exist, this will throw an + # attribute error. + ret = libc.prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) + if ret != 0: + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + except Exception as e: + log.error("Unable to call prctl %s" % e) + log.error("Children subreaping is disabled!") http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/main/python/apache/thermos/core/process.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/thermos/core/process.py b/src/main/python/apache/thermos/core/process.py index 13f9ad5..496b540 100644 --- a/src/main/python/apache/thermos/core/process.py +++ b/src/main/python/apache/thermos/core/process.py @@ -94,7 +94,6 @@ class ProcessBase(object): class CheckpointError(Error): pass class UnspecifiedSandbox(Error): pass class PermissionError(Error): pass - class ForkError(Error): pass CONTROL_WAIT_CHECK_INTERVAL = Amount(100, Time.MILLISECONDS) MAXIMUM_CONTROL_WAIT = Amount(1, Time.MINUTES) @@ -285,16 +284,7 @@ class ProcessBase(object): # calls _getpwuid which can raise: # UnknownUserError # PermissionError - try: - self._pid = self._platform.fork() # calls setup_child_subreaping which can - # raise OSError or RuntimeError - except (OSError, RuntimeError) as e: - # Reraise the exceptions possible from the fork as Process.Error - # Note only Python 3 has nice exception chaining, so we do our best here - # by logging the original exception and raising ForkError - msg = 'Error trying to fork process %s'.format(self._name) - self._log(msg, exc_info=True) - raise self.ForkError(msg) + self._pid = self._platform.fork() if self._pid == 0: self._pid = self._platform.getpid() self._wait_for_control() # can raise CheckpointError http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/test/python/apache/thermos/core/test_staged_kill.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/thermos/core/test_staged_kill.py b/src/test/python/apache/thermos/core/test_staged_kill.py index 9167e60..4de735f 100644 --- a/src/test/python/apache/thermos/core/test_staged_kill.py +++ b/src/test/python/apache/thermos/core/test_staged_kill.py @@ -248,6 +248,7 @@ class TestRunnerKillProcessGroup(RunnerBase): task = Task(name="task", processes=[Process(name="process", cmdline=SIMPLEFORK_SCRIPT)]) return task.interpolate()[0] + @pytest.mark.skipif('True', reason='Flaky test (AURORA-1809)') def test_pg_is_killed(self): runner = self.start_runner() tm = TaskMonitor(runner.tempdir, runner.task_id)
