Repository: ambari Updated Branches: refs/heads/trunk 0ee059daf -> fb50d88f1
AMBARI-19289 HDFS Service check fails if previous active NN is down (Weiwei Yang via dili) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/fb50d88f Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/fb50d88f Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/fb50d88f Branch: refs/heads/trunk Commit: fb50d88f182d02c36a27185142cab6c9e4b3659c Parents: 0ee059d Author: Di Li <d...@apache.org> Authored: Fri Jan 13 10:50:15 2017 -0500 Committer: Di Li <d...@apache.org> Committed: Fri Jan 13 10:50:15 2017 -0500 ---------------------------------------------------------------------- .../2.1.0.2.0/package/scripts/service_check.py | 24 ++++++++++++-------- .../3.0.0.3.0/package/scripts/service_check.py | 24 ++++++++++++-------- .../stacks/2.0.6/HDFS/test_service_check.py | 16 ++++++------- 3 files changed, 38 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/fb50d88f/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/service_check.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/service_check.py index 47fc646..24497d0 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/service_check.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/service_check.py @@ -43,20 +43,26 @@ class HdfsServiceCheckDefault(HdfsServiceCheck): dir = params.hdfs_tmp_dir tmp_file = format("{dir}/{unique}") - safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") + """ + Ignore checking safemode, because this command is unable to get safemode state + when 1 namenode is down in an HA setup (see more in HDFS-8277). Directly + test HDFS availability by file system operations is consistent in both HA and + non-HA environment. + """ + # safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user=params.hdfs_user ) - ExecuteHadoop(safemode_command, - user=params.hdfs_user, - logoutput=True, - conf_dir=params.hadoop_conf_dir, - try_sleep=3, - tries=20, - bin_dir=params.hadoop_bin_dir - ) + #ExecuteHadoop(safemode_command, + # user=params.hdfs_user, + # logoutput=True, + # conf_dir=params.hadoop_conf_dir, + # try_sleep=3, + # tries=20, + # bin_dir=params.hadoop_bin_dir + #) params.HdfsResource(dir, type="directory", action="create_on_execute", http://git-wip-us.apache.org/repos/asf/ambari/blob/fb50d88f/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/service_check.py b/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/service_check.py index 981f002..4348c1e 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/service_check.py +++ b/ambari-server/src/main/resources/common-services/HDFS/3.0.0.3.0/package/scripts/service_check.py @@ -43,20 +43,26 @@ class HdfsServiceCheckDefault(HdfsServiceCheck): dir = params.hdfs_tmp_dir tmp_file = format("{dir}/{unique}") - safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") + """ + Ignore checking safemode, because this command is unable to get safemode state + when 1 namenode is down in an HA setup (see more in HDFS-8277). Directly + test HDFS availability by file system operations is consistent in both HA and + non-HA environment. + """ + # safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") if params.security_enabled: Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"), user=params.hdfs_user ) - ExecuteHadoop(safemode_command, - user=params.hdfs_user, - logoutput=True, - conf_dir=params.hadoop_conf_dir, - try_sleep=3, - tries=20, - bin_dir=params.hadoop_bin_dir - ) + #ExecuteHadoop(safemode_command, + # user=params.hdfs_user, + # logoutput=True, + # conf_dir=params.hadoop_conf_dir, + # try_sleep=3, + # tries=20, + # bin_dir=params.hadoop_bin_dir + #) params.HdfsResource(dir, type="directory", action="create_on_execute", http://git-wip-us.apache.org/repos/asf/ambari/blob/fb50d88f/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_service_check.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_service_check.py index bbc1b3a..5e684ad 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_service_check.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_service_check.py @@ -52,14 +52,14 @@ class TestServiceCheck(RMFTestCase): self.assertNoMoreResources() def assert_service_check(self): - self.assertResourceCalled('ExecuteHadoop', 'dfsadmin -fs hdfs://c6401.ambari.apache.org:8020 -safemode get | grep OFF', - logoutput = True, - tries = 20, - conf_dir = '/etc/hadoop/conf', - try_sleep = 3, - bin_dir = '/usr/bin', - user = 'hdfs', - ) + #self.assertResourceCalled('ExecuteHadoop', 'dfsadmin -fs hdfs://c6401.ambari.apache.org:8020 -safemode get | grep OFF', + # logoutput = True, + # tries = 20, + # conf_dir = '/etc/hadoop/conf', + # try_sleep = 3, + # bin_dir = '/usr/bin', + # user = 'hdfs', + #) self.assertResourceCalled('HdfsResource', '/tmp', immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, security_enabled = False,