Repository: ambari Updated Branches: refs/heads/trunk 5c2a40e3e -> dd7424f34
AMBARI-18286. Processes children are not killed on timeout (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/dd7424f3 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/dd7424f3 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/dd7424f3 Branch: refs/heads/trunk Commit: dd7424f34e37987f4dedaa7de753c7ead10de53e Parents: 5c2a40e Author: Andrew Onishuk <[email protected]> Authored: Thu Sep 1 12:37:47 2016 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Thu Sep 1 12:37:47 2016 +0300 ---------------------------------------------------------------------- .../resource_management/TestGroupResource.py | 11 +++---- .../resource_management/TestUserResource.py | 23 ++++++++------- .../core/providers/system.py | 14 +-------- .../core/resources/system.py | 1 - .../python/resource_management/core/shell.py | 25 +++++++++------- .../python/resource_management/core/utils.py | 31 +++++++++++++++++++- 6 files changed, 63 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-agent/src/test/python/resource_management/TestGroupResource.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/resource_management/TestGroupResource.py b/ambari-agent/src/test/python/resource_management/TestGroupResource.py index 8ebac39..81bb7f9 100644 --- a/ambari-agent/src/test/python/resource_management/TestGroupResource.py +++ b/ambari-agent/src/test/python/resource_management/TestGroupResource.py @@ -25,6 +25,7 @@ from ambari_commons.os_check import OSCheck from resource_management.core import Environment, Fail from resource_management.core.resources import Group from resource_management.core.system import System +from resource_management.core.shell import preexec_fn import os import select @@ -62,7 +63,7 @@ class TestGroupResource(TestCase): self.assertEqual(popen_mock.call_count, 1) - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupadd -p secure hadoop"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupadd -p secure hadoop"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) getgrnam_mock.assert_called_with('hadoop') @@ -84,7 +85,7 @@ class TestGroupResource(TestCase): self.assertEqual(popen_mock.call_count, 1) - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupmod -p secure -g 2 mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupmod -p secure -g 2 mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) getgrnam_mock.assert_called_with('mapred') @@ -109,7 +110,7 @@ class TestGroupResource(TestCase): except Fail: pass self.assertEqual(popen_mock.call_count, 1) - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupmod -p secure -g 2 mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E groupmod -p secure -g 2 mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) getgrnam_mock.assert_called_with('mapred') @@ -130,7 +131,7 @@ class TestGroupResource(TestCase): self.assertEqual(popen_mock.call_count, 1) - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E groupdel mapred'], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E groupdel mapred'], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) getgrnam_mock.assert_called_with('mapred') @@ -155,7 +156,7 @@ class TestGroupResource(TestCase): pass self.assertEqual(popen_mock.call_count, 1) - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E groupdel mapred'], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E groupdel mapred'], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) getgrnam_mock.assert_called_with('mapred') def _get_group(): http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-agent/src/test/python/resource_management/TestUserResource.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/resource_management/TestUserResource.py b/ambari-agent/src/test/python/resource_management/TestUserResource.py index 2a97676..da94521 100644 --- a/ambari-agent/src/test/python/resource_management/TestUserResource.py +++ b/ambari-agent/src/test/python/resource_management/TestUserResource.py @@ -26,6 +26,7 @@ from ambari_commons.os_check import OSCheck from resource_management.core import Environment, Fail from resource_management.core.system import System from resource_management.core.resources import User +from resource_management.core.shell import preexec_fn import subprocess import os import select @@ -56,7 +57,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E useradd -m -s /bin/bash mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E useradd -m -s /bin/bash mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -71,7 +72,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -86,7 +87,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "remove", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E userdel mapred'], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E userdel mapred'], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -102,7 +103,7 @@ class TestUserResource(TestCase): user = User("mapred", action = "create", comment = "testComment", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -c testComment -s /bin/bash mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -c testComment -s /bin/bash mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -118,7 +119,7 @@ class TestUserResource(TestCase): user = User("mapred", action = "create", home = "/test/home", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -d /test/home mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -d /test/home mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -134,7 +135,7 @@ class TestUserResource(TestCase): user = User("mapred", action = "create", password = "secure", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -p secure mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -p secure mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -149,7 +150,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create", shell = "/bin/sh") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/sh mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/sh mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -164,7 +165,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create", uid = "1", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -u 1 mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -u 1 mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -179,7 +180,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create", gid = "1", shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -g 1 mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -g 1 mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) @patch('resource_management.core.providers.accounts.UserProvider.user_groups', new_callable=PropertyMock) @@ -197,7 +198,7 @@ class TestUserResource(TestCase): user = User("mapred", action = "create", groups = ['1','2','3'], shell = "/bin/bash") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -G 1,2,3,hadoop mapred'], shell=False, preexec_fn=None, env={'PATH': '/bin'}, close_fds=True, stdout=-1, stderr=-2, cwd=None) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', 'ambari-sudo.sh PATH=/bin -H -E usermod -s /bin/bash -G 1,2,3,hadoop mapred'], shell=False, preexec_fn=preexec_fn, env={'PATH': '/bin'}, close_fds=True, stdout=-1, stderr=-2, cwd=None) self.assertEqual(popen_mock.call_count, 1) @patch.object(subprocess, "Popen") @@ -211,7 +212,7 @@ class TestUserResource(TestCase): with Environment('/') as env: user = User("mapred", action = "create") - popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E useradd -m mapred"], shell=False, preexec_fn=None, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) + popen_mock.assert_called_with(['/bin/bash', '--login', '--noprofile', '-c', "ambari-sudo.sh PATH=/bin -H -E useradd -m mapred"], shell=False, preexec_fn=preexec_fn, stderr=-2, stdout=-1, env={'PATH': '/bin'}, cwd=None, close_fds=True) self.assertEqual(popen_mock.call_count, 1) def _get_user_entity(): http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-common/src/main/python/resource_management/core/providers/system.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/core/providers/system.py b/ambari-common/src/main/python/resource_management/core/providers/system.py index 914c902..fcbab01 100644 --- a/ambari-common/src/main/python/resource_management/core/providers/system.py +++ b/ambari-common/src/main/python/resource_management/core/providers/system.py @@ -243,17 +243,6 @@ class LinkProvider(Provider): Logger.info("Deleting %s" % self.resource) sudo.unlink(path) - -def _preexec_fn(resource): - def preexec(): - if resource.group: - gid = grp.getgrnam(resource.group).gr_gid - os.setgid(gid) - os.setegid(gid) - - return preexec - - class ExecuteProvider(Provider): def action_run(self): if self.resource.creates: @@ -263,8 +252,7 @@ class ExecuteProvider(Provider): shell.checked_call(self.resource.command, logoutput=self.resource.logoutput, cwd=self.resource.cwd, env=self.resource.environment, - preexec_fn=_preexec_fn(self.resource), user=self.resource.user, - wait_for_finish=self.resource.wait_for_finish, + user=self.resource.user, wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,on_timeout=self.resource.on_timeout, path=self.resource.path, sudo=self.resource.sudo, http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-common/src/main/python/resource_management/core/resources/system.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/core/resources/system.py b/ambari-common/src/main/python/resource_management/core/resources/system.py index 6860735..7f164f6 100644 --- a/ambari-common/src/main/python/resource_management/core/resources/system.py +++ b/ambari-common/src/main/python/resource_management/core/resources/system.py @@ -186,7 +186,6 @@ class Execute(Resource): # this runs command with a specific env variables, env={'JAVA_HOME': '/usr/jdk'} environment = ResourceArgument(default={}) user = ResourceArgument() - group = ResourceArgument() returns = ForcedListArgument(default=0) tries = ResourceArgument(default=1) try_sleep = ResourceArgument(default=0) # seconds http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-common/src/main/python/resource_management/core/shell.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/core/shell.py b/ambari-common/src/main/python/resource_management/core/shell.py index 94933bd..6d9eb18 100644 --- a/ambari-common/src/main/python/resource_management/core/shell.py +++ b/ambari-common/src/main/python/resource_management/core/shell.py @@ -35,6 +35,7 @@ import traceback from exceptions import Fail from exceptions import ExecuteTimeoutException from resource_management.core.logger import Logger +from resource_management.core import utils from ambari_commons.constants import AMBARI_SUDO_BINARY # use quiet=True calls from this folder (logs get too messy duplicating the resources with its commands) @@ -78,9 +79,17 @@ def log_function_call(function): return inner +def preexec_fn(): + processId = os.getpid() + try: + os.setpgid(processId, processId) + except: + Logger.exception('setpgid({0}, {0}) failed'.format(processId)) + raise + @log_function_call def checked_call(command, quiet=False, logoutput=None, stdout=subprocess.PIPE,stderr=subprocess.STDOUT, - cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, on_timeout=None, + cwd=None, env=None, preexec_fn=preexec_fn, user=None, wait_for_finish=True, timeout=None, on_timeout=None, path=None, sudo=False, on_new_line=None, tries=1, try_sleep=0): """ Execute the shell command and throw an exception on failure. @@ -94,7 +103,7 @@ def checked_call(command, quiet=False, logoutput=None, stdout=subprocess.PIPE,st @log_function_call def call(command, quiet=False, logoutput=None, stdout=subprocess.PIPE,stderr=subprocess.STDOUT, - cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, on_timeout=None, + cwd=None, env=None, preexec_fn=preexec_fn, user=None, wait_for_finish=True, timeout=None, on_timeout=None, path=None, sudo=False, on_new_line=None, tries=1, try_sleep=0): """ Execute the shell command despite failures. @@ -107,7 +116,7 @@ def call(command, quiet=False, logoutput=None, stdout=subprocess.PIPE,stderr=sub @log_function_call def non_blocking_call(command, quiet=False, stdout=subprocess.PIPE,stderr=subprocess.STDOUT, - cwd=None, env=None, preexec_fn=None, user=None, timeout=None, path=None, sudo=False): + cwd=None, env=None, preexec_fn=preexec_fn, user=None, timeout=None, path=None, sudo=False): """ Execute the shell command and don't wait until it's completion @@ -156,7 +165,7 @@ def _call_wrapper(command, **kwargs): return result def _call(command, logoutput=None, throw_on_failure=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT, - cwd=None, env=None, preexec_fn=None, user=None, wait_for_finish=True, timeout=None, on_timeout=None, + cwd=None, env=None, preexec_fn=preexec_fn, user=None, wait_for_finish=True, timeout=None, on_timeout=None, path=None, sudo=False, on_new_line=None, tries=1, try_sleep=0): """ Execute shell command @@ -371,11 +380,5 @@ def _print(line): def _on_timeout(proc, timeout_event): timeout_event.set() - if proc.poll() == None: - try: - proc.terminate() - proc.wait() - # catch race condition if proc already dead - except OSError: - pass + utils.killpg_gracefully(proc) http://git-wip-us.apache.org/repos/asf/ambari/blob/dd7424f3/ambari-common/src/main/python/resource_management/core/utils.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/core/utils.py b/ambari-common/src/main/python/resource_management/core/utils.py index c39e9b5..dc771d1 100644 --- a/ambari-common/src/main/python/resource_management/core/utils.py +++ b/ambari-common/src/main/python/resource_management/core/utils.py @@ -20,14 +20,18 @@ Ambari Agent """ +import time +import os import contextlib import sys +import signal import cStringIO from functools import wraps from resource_management.core.exceptions import Fail from itertools import chain, repeat, islice PASSWORDS_HIDE_STRING = "[PROTECTED]" +GRACEFUL_PG_KILL_TIMEOUT_SECONDS = 5 class AttributeDictionary(object): def __init__(self, *args, **kwargs): @@ -154,4 +158,29 @@ def pad_infinite(iterable, padding=None): return chain(iterable, repeat(padding)) def pad(iterable, size, padding=None): - return islice(pad_infinite(iterable, padding), size) \ No newline at end of file + return islice(pad_infinite(iterable, padding), size) + +def killpg_gracefully(proc, timeout=GRACEFUL_PG_KILL_TIMEOUT_SECONDS): + """ + Tries to kill pgroup (process group) of process with SIGTERM. + If the process is still alive after waiting for timeout, SIGKILL is sent to the pgroup. + """ + from resource_management.core import sudo + from resource_management.core.logger import Logger + + if proc.poll() == None: + try: + pgid = os.getpgid(proc.pid) + sudo.kill(-pgid, signal.SIGTERM) + + for i in xrange(10*timeout): + if proc.poll() is not None: + break + time.sleep(0.1) + else: + Logger.info("Cannot gracefully kill process group {0}. Resorting to SIGKILL.".format(pgid)) + sudo.kill(-pgid, signal.SIGKILL) + proc.wait() + # catch race condition if proc already dead + except OSError: + pass \ No newline at end of file
