Repository: ambari Updated Branches: refs/heads/trunk 8965be4cf -> c8a2da207
AMBARI-15972. Need more informative message incase of EU failures (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/c8a2da20 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/c8a2da20 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/c8a2da20 Branch: refs/heads/trunk Commit: c8a2da207af504a70c19ab903a7cefbade667cbf Parents: 8965be4 Author: Andrew Onishuk <[email protected]> Authored: Tue Apr 19 15:26:04 2016 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Tue Apr 19 15:26:04 2016 +0300 ---------------------------------------------------------------------- .../package/scripts/nodemanager_upgrade.py | 41 +++++++++----------- .../YARN/2.1.0.2.0/package/scripts/service.py | 4 +- .../stacks/2.0.6/YARN/test_nodemanager.py | 10 ++--- 3 files changed, 26 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py index 65709ac..1c886f9 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/nodemanager_upgrade.py @@ -24,6 +24,7 @@ from resource_management.core.exceptions import Fail from resource_management.core.resources.system import Execute from resource_management.core import shell from resource_management.libraries.functions.decorator import retry +from resource_management.libraries.functions.show_logs import show_logs def post_upgrade_check(): @@ -38,8 +39,12 @@ def post_upgrade_check(): if params.security_enabled and params.nodemanager_kinit_cmd: Execute(params.nodemanager_kinit_cmd, user=params.yarn_user) - _check_nodemanager_startup() - + try: + _check_nodemanager_startup() + except Fail: + show_logs(params.yarn_log_dir, params.yarn_user) + raise + @retry(times=30, sleep_time=10, err_class=Fail) def _check_nodemanager_startup(): @@ -54,23 +59,15 @@ def _check_nodemanager_startup(): import socket command = 'yarn node -list -states=RUNNING' - - try: - # 'su - yarn -c "yarn node -status c6401.ambari.apache.org:45454"' - return_code, yarn_output = shell.call(command, user=params.yarn_user) - except: - raise Fail('Unable to determine if the NodeManager has started after upgrade.') - - if return_code == 0: - hostname = params.hostname.lower() - hostname_ip = socket.gethostbyname(params.hostname.lower()) - nodemanager_address = params.nm_address.lower() - yarn_output = yarn_output.lower() - - if hostname in yarn_output or nodemanager_address in yarn_output or hostname_ip in yarn_output: - Logger.info('NodeManager with ID {0} has rejoined the cluster.'.format(nodemanager_address)) - return - else: - raise Fail('NodeManager with ID {0} was not found in the list of running NodeManagers'.format(nodemanager_address)) - - raise Fail('Unable to determine if the NodeManager has started after upgrade (result code {0})'.format(str(return_code))) + return_code, yarn_output = shell.checked_call(command, user=params.yarn_user) + + hostname = params.hostname.lower() + hostname_ip = socket.gethostbyname(params.hostname.lower()) + nodemanager_address = params.nm_address.lower() + yarn_output = yarn_output.lower() + + if hostname in yarn_output or nodemanager_address in yarn_output or hostname_ip in yarn_output: + Logger.info('NodeManager with ID \'{0}\' has rejoined the cluster.'.format(nodemanager_address)) + return + else: + raise Fail('NodeManager with ID \'{0}\' was not found in the list of running NodeManagers. \'{1}\' output was:\n{2}'.format(nodemanager_address, command, yarn_output)) http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py index 0703c1f..b1179b9 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/scripts/service.py @@ -45,7 +45,7 @@ def service(componentName, action='start', serviceName='yarn'): daemon = format("{mapred_bin}/mr-jobhistory-daemon.sh") pid_file = format("{mapred_pid_dir}/mapred-{mapred_user}-{componentName}.pid") usr = params.mapred_user - log_dir = params.yarn_log_dir + log_dir = params.mapred_log_dir else: # !!! yarn-daemon.sh deletes the PID for us; if we remove it the script # may not work correctly when stopping the service @@ -53,7 +53,7 @@ def service(componentName, action='start', serviceName='yarn'): daemon = format("{yarn_bin}/yarn-daemon.sh") pid_file = format("{yarn_pid_dir}/yarn-{yarn_user}-{componentName}.pid") usr = params.yarn_user - log_dir = params.mapred_log_dir + log_dir = params.yarn_log_dir cmd = format("export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {daemon} --config {hadoop_conf_dir}") http://git-wip-us.apache.org/repos/asf/ambari/blob/c8a2da20/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py index 665ebfb..cb8178c 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py +++ b/ambari-server/src/test/python/stacks/2.0.6/YARN/test_nodemanager.py @@ -539,18 +539,18 @@ class TestNodeManager(RMFTestCase): config_file = "default.json", stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES, - call_mocks = [(0, process_output)], + checked_call_mocks = [(0, process_output)], mocks_dict = mocks_dict ) - self.assertTrue(mocks_dict['call'].called) - self.assertEqual(mocks_dict['call'].call_count,1) + self.assertTrue(mocks_dict['checked_call'].called) + self.assertEqual(mocks_dict['checked_call'].call_count,1) self.assertEquals( "yarn node -list -states=RUNNING", - mocks_dict['call'].call_args_list[0][0][0]) + mocks_dict['checked_call'].call_args_list[0][0][0]) - self.assertEquals( {'user': u'yarn'}, mocks_dict['call'].call_args_list[0][1]) + self.assertEquals( {'user': u'yarn'}, mocks_dict['checked_call'].call_args_list[0][1]) @patch('time.sleep')
