AMBARI-21614. Restart NFSGateway fails after ResourceManager move to another host (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7c6f754e Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7c6f754e Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7c6f754e Branch: refs/heads/branch-feature-AMBARI-21450 Commit: 7c6f754e963abaadd628fd84e51ef0dfb22dc726 Parents: ffa9acf Author: Andrew Onishuk <[email protected]> Authored: Tue Aug 1 12:46:52 2017 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Tue Aug 1 12:46:52 2017 +0300 ---------------------------------------------------------------------- .../HDFS/2.1.0.2.0/package/scripts/utils.py | 16 ++++++++++++++++ .../test/python/stacks/2.0.6/HDFS/test_namenode.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/7c6f754e/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py index d861ba9..e2d60a0 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py @@ -19,6 +19,7 @@ limitations under the License. import os import re import urllib2 +import subprocess import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. from resource_management.core.resources.system import Directory, File, Execute @@ -281,6 +282,21 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False, except: show_logs(log_dir, user) raise + + # Wait until stop actually happens + process_id_does_not_exist_command = format("! ( {process_id_exists_command} )") + code, out = shell.call(process_id_does_not_exist_command, + env=hadoop_env_exports, + tries = 6, + try_sleep = 10, + ) + + # If stop didn't happen, kill it forcefully + if code != 0: + code, out, err = shell.checked_call(("cat", pid_file), sudo=True, env=hadoop_env_exports, stderr=subprocess.PIPE) + pid = out + Execute(("kill", "-9", pid), sudo=True) + File(pid_file, action="delete") def get_jmx_data(nn_address, modeler_type, metric, encrypted=False, security_enabled=False): http://git-wip-us.apache.org/repos/asf/ambari/blob/7c6f754e/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py index 862a17e..18d98a9 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py @@ -1348,7 +1348,7 @@ class TestNamenode(RMFTestCase): config_file = "nn_eu_standby.json", stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = [(0, None, ''), (0, None)], + call_mocks = [(0, None), (0, None, ''), (0, None)] , mocks_dict=mocks_dict) calls = mocks_dict['call'].call_args_list
