Repository: ambari Updated Branches: refs/heads/trunk 8a20810b2 -> 0806468be
AMBARI-15991. DataNode and RegionServer during upgrade are reported as "failed" incorrectly (part2) (Daniel Gergely via oleewere) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0806468b Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0806468b Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0806468b Branch: refs/heads/trunk Commit: 0806468bedeb764f2d97025cb086944e482ace84 Parents: 8a20810 Author: Daniel Gergely <[email protected]> Authored: Mon Apr 25 12:01:46 2016 +0200 Committer: oleewere <[email protected]> Committed: Mon Apr 25 12:01:46 2016 +0200 ---------------------------------------------------------------------- .../HBASE/0.96.0.2.0/package/scripts/upgrade.py | 14 ++++++-------- .../2.1.0.2.0/package/scripts/datanode_upgrade.py | 16 +++++++--------- .../test/python/stacks/2.0.6/HDFS/test_datanode.py | 10 +++++----- 3 files changed, 18 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py index bc68cc6..b1a19e6 100644 --- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py +++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py @@ -41,14 +41,8 @@ def post_regionserver(env): check_cmd = "echo 'status \"simple\"' | {0} shell".format(params.hbase_cmd) exec_cmd = "{0} {1}".format(params.kinit_cmd, check_cmd) - _wait_for_region_server_to_start(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE) + call_and_match(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE) -@retry(times=3, sleep_time=300, err_class=Fail) -def _wait_for_region_server_to_start(cmd, user, regex, regex_search_flags): - if not is_region_server_process_running(): - Logger.info("RegionServer process is not running") - raise Fail("RegionServer process is not running") - call_and_match(cmd, user, regex, regex_search_flags) def is_region_server_process_running(): try: @@ -58,9 +52,13 @@ def is_region_server_process_running(): except ComponentIsNotRunning: return False -@retry(times=15, sleep_time=2, err_class=Fail) +@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins def call_and_match(cmd, user, regex, regex_search_flags): + if not is_region_server_process_running(): + Logger.info("RegionServer process is not running") + raise Fail("RegionServer process is not running") + code, out = shell.call(cmd, user=user) if not (out and re.search(regex, out, regex_search_flags)): http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py index c8e2eab..b55237d 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/datanode_upgrade.py @@ -73,13 +73,6 @@ def post_upgrade_check(hdfs_binary): Execute(params.dn_kinit_cmd, user=params.hdfs_user) # verify that the datanode has started and rejoined the HDFS cluster - _wait_for_datanode_to_join(hdfs_binary) - -@retry(times=3, sleep_time=300, err_class=Fail) -def _wait_for_datanode_to_join(hdfs_binary): - if not is_datanode_process_running(): - Logger.info("DataNode process is not running") - raise Fail("DataNode process is not running") _check_datanode_startup(hdfs_binary) @@ -125,16 +118,21 @@ def _check_datanode_shutdown(hdfs_binary): raise Fail('DataNode has not shutdown.') -@retry(times=12, sleep_time=10, err_class=Fail) +@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins def _check_datanode_startup(hdfs_binary): """ - Checks that a DataNode is reported as being alive via the + Checks that a DataNode process is running and DataNode is reported as being alive via the "hdfs dfsadmin -fs {namenode_address} -report -live" command. Once the DataNode is found to be alive this method will return, otherwise it will raise a Fail(...) and retry automatically. :param hdfs_binary: name/path of the HDFS binary to use :return: """ + + if not is_datanode_process_running(): + Logger.info("DataNode process is not running") + raise Fail("DataNode process is not running") + import params import socket http://git-wip-us.apache.org/repos/asf/ambari/blob/0806468b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py index dbd76cf..90c12ca 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py +++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py @@ -515,7 +515,7 @@ class TestDatanode(RMFTestCase): config_file = "default.json", stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = [(0, shell_call_output)] * 3, + call_mocks = [(0, shell_call_output)], mocks_dict = mocks_dict ) @@ -535,13 +535,13 @@ class TestDatanode(RMFTestCase): config_file = "default.json", stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = [(0, 'There are no DataNodes here!')] * 36, + call_mocks = [(0, 'There are no DataNodes here!')] * 30, mocks_dict = mocks_dict ) self.fail('Missing DataNode should have caused a failure') except Fail,fail: self.assertTrue(mocks_dict['call'].called) - self.assertEqual(mocks_dict['call'].call_count,36) + self.assertEqual(mocks_dict['call'].call_count,30) @patch("socket.gethostbyname") @@ -556,13 +556,13 @@ class TestDatanode(RMFTestCase): config_file = "default.json", stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = [(1, 'some')] * 36, + call_mocks = [(1, 'some')] * 30, mocks_dict = mocks_dict ) self.fail('Invalid return code should cause a failure') except Fail,fail: self.assertTrue(mocks_dict['call'].called) - self.assertEqual(mocks_dict['call'].call_count,36) + self.assertEqual(mocks_dict['call'].call_count,30) @patch("resource_management.core.shell.call")
