AMBARI-18240 - During a Rolling Downgrade Oozie Long Running Jobs Can Fail (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/689d4468 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/689d4468 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/689d4468 Branch: refs/heads/AMBARI-2.4.0.2 Commit: 689d446849a897d6e5b7800c28d40b3eb6cd65a8 Parents: 2979f45 Author: Jonathan Hurley <jhur...@hortonworks.com> Authored: Tue Aug 23 12:03:19 2016 -0400 Committer: Jonathan Hurley <jhur...@hortonworks.com> Committed: Tue Aug 23 17:20:37 2016 -0400 ---------------------------------------------------------------------- .../2.1.0.2.0/package/scripts/hdfs_namenode.py | 85 +++--- .../python/stacks/2.0.6/HDFS/test_namenode.py | 283 ++++++------------- 2 files changed, 132 insertions(+), 236 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/689d4468/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py index 5a431aa..63d72aa 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py @@ -23,6 +23,8 @@ from resource_management.core import shell from resource_management.core.source import Template from resource_management.core.resources.system import File, Execute, Directory from resource_management.core.resources.service import Service +from resource_management.libraries.functions import namenode_ha_utils +from resource_management.libraries.functions.decorator import retry from resource_management.libraries.functions.format import format from resource_management.libraries.functions.check_process_status import check_process_status from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop @@ -34,7 +36,6 @@ from utils import get_dfsadmin_base_command if OSCheck.is_windows_family(): from resource_management.libraries.functions.windows_service_utils import check_windows_service_status -from resource_management.core.shell import as_user from resource_management.core.exceptions import Fail from resource_management.core.logger import Logger @@ -162,12 +163,12 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____| # no-HA | ON -> OFF | Yes | # HA and active | ON -> OFF | Yes | - # HA and standby | no change | no check | + # HA and standby | no change | No | # RU with HA on active | ON -> OFF | Yes | # RU with HA on standby | ON -> OFF | Yes | - # EU with HA on active | ON -> OFF | Yes | - # EU with HA on standby | ON -> OFF | Yes | - # EU non-HA | ON -> OFF | Yes | + # EU with HA on active | ON -> OFF | No | + # EU with HA on standby | ON -> OFF | No | + # EU non-HA | ON -> OFF | No | # because we do things like create directories after starting NN, # the vast majority of the time this should be True - it should only @@ -179,21 +180,30 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, if params.dfs_ha_enabled: Logger.info("Waiting for the NameNode to broadcast whether it is Active or Standby...") - if check_is_active_namenode(hdfs_binary): - Logger.info("Waiting for the NameNode to leave Safemode since High Availability is enabled and it is Active...") - else: + + if is_this_namenode_active() is False: # we are the STANDBY NN - ensure_safemode_off = False is_active_namenode = False - Logger.info("This is the Standby NameNode; proceeding without waiting for it to leave Safemode") - else: - Logger.info("Waiting for the NameNode to leave Safemode...") - # During an Express Upgrade, NameNode will not leave SafeMode until the DataNodes are started + # we are the STANDBY NN and this restart is not part of an upgrade + if upgrade_type is None: + ensure_safemode_off = False + + + # During an Express Upgrade, NameNode will not leave SafeMode until the DataNodes are started, + # so always disable the Safemode check if upgrade_type == "nonrolling": - Logger.info("An express upgrade has been detected and this NameNode will not leave Safemode until DataNodes are started. Safemode does not need to end before proceeding.") ensure_safemode_off = False + # some informative logging separate from the above logic to keep things a little cleaner + if ensure_safemode_off: + Logger.info("Waiting for this NameNode to leave Safemode due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}".format( + params.dfs_ha_enabled, is_active_namenode, upgrade_type)) + else: + Logger.info("Skipping Safemode check due to the following conditions: HA: {0}, isActive: {1}, upgradeType: {2}".format( + params.dfs_ha_enabled, is_active_namenode, upgrade_type)) + + # wait for Safemode to end if ensure_safemode_off: wait_for_safemode_off(hdfs_binary) @@ -205,7 +215,7 @@ def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, create_hdfs_directories() create_ranger_audit_hdfs_directories() else: - Logger.info("Skipping creation of HDFS directories since this is not the Active NameNode.") + Logger.info("Skipping creation of HDFS directories since this is either not the Active NameNode or we did not wait for Safemode to finish.") elif action == "stop": import params @@ -500,31 +510,40 @@ def is_namenode_bootstrapped(params): return marked -def check_is_active_namenode(hdfs_binary): +@retry(times=5, sleep_time=5, backoff_factor=2, err_class=Fail) +def is_this_namenode_active(): """ - Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False. - :return: True if current NameNode is active, False otherwise + Gets whether the current NameNode is Active. This function will wait until the NameNode is + listed as being either Active or Standby before returning a value. This is to ensure that + that if the other NameNode is Active, we ensure that this NameNode has fully loaded and + registered in the event that the other NameNode is going to be restarted. This prevents + a situation where we detect the other NameNode as Active before this NameNode has fully booted. + If the other Active NameNode is then restarted, there can be a loss of service if this + NameNode has not entered Standby. """ import params - if params.dfs_ha_enabled: - is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -ns {dfs_ha_nameservices} -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) - is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -ns {dfs_ha_nameservices} -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir}) + # returns ([('nn1', 'c6401.ambari.apache.org:50070')], [('nn2', 'c6402.ambari.apache.org:50070')], []) + # 0 1 2 + # + namenode_states = namenode_ha_utils.get_namenode_states(params.hdfs_site, params.security_enabled, + params.hdfs_user, times=5, sleep_time=5, backoff_factor=2) - for i in range(0, 5): - code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0 - if code == 0: # active - return True + # unwraps ('nn1', 'c6401.ambari.apache.org:50070') + active_namenodes = [] if len(namenode_states[0]) < 1 else namenode_states[0][0] - code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0 - if code == 0: # other NN is active - return False + # unwraps ('nn2', 'c6402.ambari.apache.org:50070') + standby_namenodes = [] if len(namenode_states[1]) < 1 else namenode_states[1][0] - if i < 4: # Do not sleep after last iteration - time.sleep(6) + # check to see if this is the active NameNode + if params.namenode_id in active_namenodes: + return True - Logger.info("Active NameNode is not found.") + # if this is not the active NameNode, then we must wait for it to register as standby + if params.namenode_id in standby_namenodes: return False - else: - return True + # this this point, this NameNode is neither active nor standby - we must wait to ensure it + # enters at least one of these roles before returning a verdict - the annotation will catch + # this failure and retry the fuction automatically + raise Fail(format("The NameNode {namenode_id} is not listed as Active or Standby, waiting...")) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/689d4468/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py index 41c7366..72949bb 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py @@ -399,7 +399,14 @@ class TestNamenode(RMFTestCase): self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid',action = ['delete']) self.assertNoMoreResources() - def test_start_ha_default(self): + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_default(self, get_namenode_states_mock): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", command = "start", @@ -492,20 +499,21 @@ class TestNamenode(RMFTestCase): ) self.assertNoMoreResources() - @patch.object(shell, "call") @patch.object(time, "sleep") - def test_start_ha_default_active_with_retry(self, sleep_mock, call_mocks): - call_mocks = MagicMock() - call_mocks.side_effect = [(1, None), (1, None), (1, None), (1, None), (0, None)] + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_default_active_with_retry(self, get_namenode_states_mock, sleep_mock): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.side_effect = [([], [], active_namenodes), (active_namenodes, standby_namenodes, unknown_namenodes)] self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", command = "start", config_file = "ha_default.json", stack_version = self.STACK_VERSION, - target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = call_mocks - ) + target = RMFTestCase.TARGET_COMMON_SERVICES) self.assert_configure_default() self.assertResourceCalled('File', '/etc/hadoop/conf/dfs.exclude', owner = 'hdfs', @@ -590,17 +598,17 @@ class TestNamenode(RMFTestCase): hadoop_conf_dir = '/etc/hadoop/conf', ) self.assertNoMoreResources() - self.assertTrue(call_mocks.called) - self.assertEqual(5, call_mocks.call_count) - calls = [ - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn1 | grep active'"), - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn2 | grep active'"), - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn1 | grep active'"), - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn2 | grep active'"), - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn1 | grep active'")] - call_mocks.assert_has_calls(calls) - - def test_start_ha_secured(self): + self.assertTrue(get_namenode_states_mock.called) + self.assertEqual(2, get_namenode_states_mock.call_count) + + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_secured(self, get_namenode_states_mock): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", command = "start", @@ -698,7 +706,14 @@ class TestNamenode(RMFTestCase): # tests namenode start command when NameNode HA is enabled, and # the HA cluster is started initially, rather than using the UI Wizard - def test_start_ha_bootstrap_active_from_blueprint(self): + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_bootstrap_active_from_blueprint(self, get_namenode_states_mock): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", command = "start", @@ -805,7 +820,14 @@ class TestNamenode(RMFTestCase): # the HA cluster is started initially, rather than using the UI Wizard # this test verifies the startup of a "standby" namenode @patch.object(shell, "call") - def test_start_ha_bootstrap_standby_from_blueprint(self, call_mocks): + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_bootstrap_standby_from_blueprint(self, get_namenode_states_mock, call_mocks): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes + call_mocks = MagicMock(return_value=(0,"")) self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", @@ -852,73 +874,24 @@ class TestNamenode(RMFTestCase): environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'}, not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid", ) - self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'", - tries=115, - try_sleep=10, - user="hdfs", - logoutput=True - ) - self.assertResourceCalled('HdfsResource', '/tmp', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'hdfs', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0777, - ) - self.assertResourceCalled('HdfsResource', '/user/ambari-qa', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'ambari-qa', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0770, - ) - self.assertResourceCalled('HdfsResource', None, - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - action = ['execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - hadoop_conf_dir = '/etc/hadoop/conf', - ) self.assertNoMoreResources() self.assertTrue(call_mocks.called) - self.assertEqual(2, call_mocks.call_count) + self.assertEqual(1, call_mocks.call_count) calls = [ - call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs'), - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn2 | grep active'")] + call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, user=u'hdfs')] call_mocks.assert_has_calls(calls, any_order=False) # tests namenode start command when NameNode HA is enabled, and # the HA cluster is started initially, rather than using the UI Wizard # this test verifies the startup of a "standby" namenode @patch.object(shell, "call") - def test_start_ha_bootstrap_standby_from_blueprint_initial_start(self, call_mocks): + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_bootstrap_standby_from_blueprint_initial_start(self, get_namenode_states_mock, call_mocks): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes call_mocks = MagicMock() call_mocks.side_effect = [(1, None), (0, None), (0, None)] @@ -967,71 +940,23 @@ class TestNamenode(RMFTestCase): environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'}, not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid", ) - self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'", - tries=115, - try_sleep=10, - user="hdfs", - logoutput=True - ) - self.assertResourceCalled('HdfsResource', '/tmp', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'hdfs', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0777, - ) - self.assertResourceCalled('HdfsResource', '/user/ambari-qa', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'ambari-qa', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0770, - ) - self.assertResourceCalled('HdfsResource', None, - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - action = ['execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - hadoop_conf_dir = '/etc/hadoop/conf', - ) self.assertNoMoreResources() self.assertTrue(call_mocks.called) - self.assertEqual(3, call_mocks.call_count) + self.assertEqual(2, call_mocks.call_count) calls = [ - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn2 | grep active'"), call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'), call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')] call_mocks.assert_has_calls(calls, any_order=True) + @patch.object(shell, "call") - def test_start_ha_bootstrap_standby_from_blueprint_initial_start_dfs_nameservices(self, call_mocks): + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_start_ha_bootstrap_standby_from_blueprint_initial_start_dfs_nameservices(self, get_namenode_states_mock, call_mocks): + active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] + standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] + unknown_namenodes = [] + + get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes call_mocks = MagicMock() call_mocks.side_effect = [(1, None), (0, None), (0, None)] @@ -1080,65 +1005,10 @@ class TestNamenode(RMFTestCase): environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'}, not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E pgrep -F /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid", ) - self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'", - tries=115, - try_sleep=10, - user="hdfs", - logoutput=True - ) - self.assertResourceCalled('HdfsResource', '/tmp', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'hdfs', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0777, - ) - self.assertResourceCalled('HdfsResource', '/user/ambari-qa', - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - owner = 'ambari-qa', - hadoop_conf_dir = '/etc/hadoop/conf', - type = 'directory', - action = ['create_on_execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - mode = 0770, - ) - self.assertResourceCalled('HdfsResource', None, - immutable_paths = self.DEFAULT_IMMUTABLE_PATHS, - security_enabled = False, - keytab = UnknownConfigurationMock(), - hadoop_bin_dir = '/usr/bin', - default_fs = 'hdfs://ns1', - hdfs_site = self.getConfig()['configurations']['hdfs-site'], - kinit_path_local = '/usr/bin/kinit', - principal_name = None, - user = 'hdfs', - dfs_type = '', - action = ['execute'], hdfs_resource_ignore_file='/var/lib/ambari-agent/data/.hdfs_resource_ignore', - hadoop_conf_dir = '/etc/hadoop/conf', - ) self.assertNoMoreResources() self.assertTrue(call_mocks.called) - self.assertEqual(3, call_mocks.call_count) + self.assertEqual(2, call_mocks.call_count) calls = [ - call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -ns ns1 -getServiceState nn2 | grep active'"), call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs'), call('hdfs namenode -bootstrapStandby -nonInteractive -force', logoutput=False, user=u'hdfs')] call_mocks.assert_has_calls(calls, any_order=True) @@ -1506,9 +1376,9 @@ class TestNamenode(RMFTestCase): ) put_structured_out_mock.assert_called_with({"securityState": "UNSECURED"}) - - @patch("utils.get_namenode_states") - def test_upgrade_restart(self, get_namenode_states_mock): + @patch.object(time, "sleep") + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") + def test_upgrade_restart(self, get_namenode_states_mock, sleep_mock): # Execution of nn_ru_lzo invokes a code path that invokes lzo installation, which # was failing in RU case. See hdfs.py and the lzo_enabled check that is in it. # Just executing the script is enough to test the fix @@ -1517,6 +1387,7 @@ class TestNamenode(RMFTestCase): unknown_namenodes = [] get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", classname = "NameNode", command = "restart", @@ -1524,15 +1395,21 @@ class TestNamenode(RMFTestCase): stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES) + # now return unknown and ensure that we cannot proceed with the upgrade since we + # _must_ wait for Safemode to be done unknown_namenodes = active_namenodes active_namenodes = [] get_namenode_states_mock.return_value = active_namenodes, standby_namenodes, unknown_namenodes - self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", - classname = "NameNode", - command = "restart", - config_file = "nn_ru_lzo.json", - stack_version = self.STACK_VERSION, - target = RMFTestCase.TARGET_COMMON_SERVICES) + try: + self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/namenode.py", + classname = "NameNode", + command = "restart", + config_file = "nn_ru_lzo.json", + stack_version = self.STACK_VERSION, + target = RMFTestCase.TARGET_COMMON_SERVICES) + raise Fail("Expected a failure due to the inability to determine Active/Standby NameNode and Safemode detection") + except: + pass self.assertFalse(0 == len(Script.structuredOut)) self.assertTrue(Script.structuredOut.has_key("upgrade_type")) @@ -1541,7 +1418,7 @@ class TestNamenode(RMFTestCase): self.assertEquals("UPGRADE", Script.structuredOut["direction"]) - @patch("utils.get_namenode_states") + @patch("resource_management.libraries.functions.namenode_ha_utils.get_namenode_states") def test_upgrade_restart_eu(self, get_namenode_states_mock): active_namenodes = [('nn1', 'c6401.ambari.apache.org:50070')] standby_namenodes = [('nn2', 'c6402.ambari.apache.org:50070')] @@ -1563,7 +1440,7 @@ class TestNamenode(RMFTestCase): self.assertTrue(calls[0].startsWith("conf-select create-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0")) - @patch("hdfs_namenode.check_is_active_namenode") + @patch("hdfs_namenode.is_this_namenode_active") @patch("resource_management.libraries.functions.setup_ranger_plugin_xml.setup_ranger_plugin") @patch("utils.get_namenode_states") def test_upgrade_restart_eu_with_ranger(self, get_namenode_states_mock, setup_ranger_plugin_mock, is_active_nn_mock):