Repository: ambari Updated Branches: refs/heads/branch-2.4 9b1a3e885 -> f975c3b32
AMBARI-18684 - Webhcat server start failed during EU with BindException (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/f975c3b3 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/f975c3b3 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/f975c3b3 Branch: refs/heads/branch-2.4 Commit: f975c3b3209c9f197e0db577295e084a4109f5a9 Parents: 9b1a3e8 Author: Jonathan Hurley <[email protected]> Authored: Mon Oct 24 12:49:59 2016 -0400 Committer: Jonathan Hurley <[email protected]> Committed: Sat Oct 29 20:36:00 2016 -0400 ---------------------------------------------------------------------- .../package/scripts/webhcat_service.py | 34 +++++++++++++------- .../stacks/2.0.6/HIVE/test_webhcat_server.py | 28 +++++----------- 2 files changed, 31 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/f975c3b3/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/webhcat_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/webhcat_service.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/webhcat_service.py index c24db4c..962becc 100644 --- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/webhcat_service.py +++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/webhcat_service.py @@ -59,35 +59,47 @@ def webhcat_service(action='start', upgrade_type=None): raise elif action == 'stop': try: + # try stopping WebHCat using its own script graceful_stop(cmd, environ) except Fail: show_logs(params.hcat_log_dir, params.webhcat_user) Logger.info(traceback.format_exc()) + # run this as WebHcat since the Execute conditions of not_of and only_if can't pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`" + + # the PID must exist AND'd with the process must be alive + # the return code here is going to be 0 IFF both conditions are met correctly process_id_exists_command = format("ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1") + + # kill command to run daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}") - wait_time = 10 + + # check to ensure that it has stopped by looking for the running PID and then killing + # it forcefully if it exists - the behavior of not-if/only-if is as follows: + # not_if return code IS 0 + # only_if return code is NOT 0 Execute(daemon_hard_kill_cmd, - not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"), - ignore_failures = True - ) + only_if = process_id_exists_command, + ignore_failures = True) try: # check if stopped the process, else fail the task - Execute(format("! ({process_id_exists_command})"), - tries=20, - try_sleep=3, - ) + Execute(format("! ({process_id_exists_command})")) except: show_logs(params.hcat_log_dir, params.webhcat_user) raise - File(params.webhcat_pid_file, - action="delete", - ) + File(params.webhcat_pid_file, action="delete") def graceful_stop(cmd, environ): + """ + Attemps to stop WebHCat using its own shell script. On some versions this may not correctly + stop the daemon. + :param cmd: the command to run to stop the daemon + :param environ: the environment variables to execute the command with + :return: + """ import params daemon_cmd = format('{cmd} stop') http://git-wip-us.apache.org/repos/asf/ambari/blob/f975c3b3/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_webhcat_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_webhcat_server.py b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_webhcat_server.py index 683c953..0880a19 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_webhcat_server.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_webhcat_server.py @@ -71,14 +71,11 @@ class TestWebHCatServer(RMFTestCase): ) self.assertResourceCalled('Execute', 'ambari-sudo.sh kill -9 `ambari-sudo.sh su hcat -l -s /bin/bash -c \'[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid\'`', - not_if = "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) || ( sleep 10 && ! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) )", + only_if = "ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1", ignore_failures = True ) - self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)", - tries=20, - try_sleep=3, - ) + self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)") self.assertResourceCalled('File', '/var/run/webhcat/webhcat.pid', action = ['delete'], @@ -114,14 +111,11 @@ class TestWebHCatServer(RMFTestCase): ) self.assertResourceCalled('Execute', 'ambari-sudo.sh kill -9 `ambari-sudo.sh su hcat -l -s /bin/bash -c \'[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid\'`', - not_if = "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) || ( sleep 10 && ! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) )", + only_if = "ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1", ignore_failures = True ) - self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)", - tries=20, - try_sleep=3, - ) + self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)") self.assertResourceCalled('File', '/var/run/webhcat/webhcat.pid', action = ['delete'], @@ -160,14 +154,11 @@ class TestWebHCatServer(RMFTestCase): ) self.assertResourceCalled('Execute', 'ambari-sudo.sh kill -9 `ambari-sudo.sh su hcat -l -s /bin/bash -c \'[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid\'`', - not_if = "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) || ( sleep 10 && ! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) )", + only_if = "ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1", ignore_failures = True ) - self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)", - tries=20, - try_sleep=3, - ) + self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)") self.assertResourceCalled('File', '/var/run/webhcat/webhcat.pid', action = ['delete'], ) @@ -190,14 +181,11 @@ class TestWebHCatServer(RMFTestCase): ) self.assertResourceCalled('Execute', 'ambari-sudo.sh kill -9 `ambari-sudo.sh su hcat -l -s /bin/bash -c \'[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid\'`', - not_if = "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) || ( sleep 10 && ! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1) )", + only_if = "ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1", ignore_failures = True ) - self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)", - tries=20, - try_sleep=3, - ) + self.assertResourceCalled('Execute', "! (ls /var/run/webhcat/webhcat.pid >/dev/null 2>&1 && ps -p `ambari-sudo.sh su hcat -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]cat /var/run/webhcat/webhcat.pid'` >/dev/null 2>&1)") self.assertResourceCalled('File', '/var/run/webhcat/webhcat.pid', action = ['delete'], )
