AMBARI-15324 - Kerberos Tickets Expire Too Frequently For Alerts (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/2efe8945 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/2efe8945 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/2efe8945 Branch: refs/heads/AMBARI-13364 Commit: 2efe8945c152fa5d4bdc51bf1828ca0ade4a004e Parents: 8615cac Author: Jonathan Hurley <[email protected]> Authored: Mon Mar 7 17:36:13 2016 -0500 Committer: Jonathan Hurley <[email protected]> Committed: Tue Mar 8 15:53:52 2016 -0500 ---------------------------------------------------------------------- ambari-agent/conf/unix/ambari-agent.ini | 1 + ambari-agent/conf/windows/ambari-agent.ini | 1 + .../ambari_agent/AlertSchedulerHandler.py | 13 +-- .../src/main/python/ambari_agent/Controller.py | 7 +- .../python/ambari_agent/alerts/base_alert.py | 7 +- .../python/ambari_agent/alerts/metric_alert.py | 11 ++- .../python/ambari_agent/alerts/port_alert.py | 4 +- .../ambari_agent/alerts/recovery_alert.py | 4 +- .../python/ambari_agent/alerts/script_alert.py | 13 ++- .../python/ambari_agent/alerts/web_alert.py | 11 +-- .../ambari_agent/TestAlertSchedulerHandler.py | 28 ++++--- .../src/test/python/ambari_agent/TestAlerts.py | 83 ++++++++++--------- .../test/python/ambari_agent/TestBaseAlert.py | 20 +++-- .../test/python/ambari_agent/TestMetricAlert.py | 11 ++- .../test/python/ambari_agent/TestPortAlert.py | 20 +++-- .../test/python/ambari_agent/TestScriptAlert.py | 7 +- .../libraries/functions/curl_krb_request.py | 86 +++++++++++++++++--- .../package/alerts/alert_checkpoint_time.py | 10 ++- .../package/alerts/alert_ha_namenode_health.py | 7 +- .../package/alerts/alert_metrics_deviation.py | 16 +++- .../package/alerts/alert_upgrade_finalized.py | 9 +- .../package/alerts/alert_webhcat_server.py | 18 ++-- .../package/alerts/alert_nodemanager_health.py | 6 +- .../alerts/alert_nodemanagers_summary.py | 10 ++- 24 files changed, 270 insertions(+), 133 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/conf/unix/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/unix/ambari-agent.ini b/ambari-agent/conf/unix/ambari-agent.ini index 05e898a..4ec16d6 100644 --- a/ambari-agent/conf/unix/ambari-agent.ini +++ b/ambari-agent/conf/unix/ambari-agent.ini @@ -32,6 +32,7 @@ tolerate_download_failures=true run_as_user=root parallel_execution=0 alert_grace_period=5 +alert_kinit_timeout=14400000 system_resource_overrides=/etc/resource_overrides [security] http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/conf/windows/ambari-agent.ini ---------------------------------------------------------------------- diff --git a/ambari-agent/conf/windows/ambari-agent.ini b/ambari-agent/conf/windows/ambari-agent.ini index e490f7c..df88be6 100644 --- a/ambari-agent/conf/windows/ambari-agent.ini +++ b/ambari-agent/conf/windows/ambari-agent.ini @@ -30,6 +30,7 @@ cache_dir=cache tolerate_download_failures=true parallel_execution=0 alert_grace_period=5 +alert_kinit_timeout=14400000 system_resource_overrides=\\etc\\resource_overrides [security] http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py index eb9945b..b84832d 100644 --- a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py +++ b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py @@ -35,6 +35,7 @@ from alerts.script_alert import ScriptAlert from alerts.web_alert import WebAlert from alerts.recovery_alert import RecoveryAlert from ambari_agent.ExitHelper import ExitHelper + logger = logging.getLogger(__name__) class AlertSchedulerHandler(): @@ -46,8 +47,7 @@ class AlertSchedulerHandler(): TYPE_RECOVERY = 'RECOVERY' def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, - alert_grace_period, cluster_configuration, config, recovery_manager, - in_minutes=True): + cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir @@ -58,7 +58,10 @@ class AlertSchedulerHandler(): # a mapping between a cluster name and a unique hash for all definitions self._cluster_hashes = {} - + + # the amount of time, in seconds, that an alert can run after it's scheduled time + alert_grace_period = int(config.get('agent', 'alert_grace_period', 5)) + if not os.path.exists(cachedir): try: os.makedirs(cachedir) @@ -297,7 +300,7 @@ class AlertSchedulerHandler(): if source_type == AlertSchedulerHandler.TYPE_METRIC: alert = MetricAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_PORT: - alert = PortAlert(json_definition, source) + alert = PortAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_SCRIPT: source['stacks_directory'] = self.stacks_dir source['common_services_directory'] = self.common_services_dir @@ -306,7 +309,7 @@ class AlertSchedulerHandler(): elif source_type == AlertSchedulerHandler.TYPE_WEB: alert = WebAlert(json_definition, source, self.config) elif source_type == AlertSchedulerHandler.TYPE_RECOVERY: - alert = RecoveryAlert(json_definition, source, self.recovery_manger) + alert = RecoveryAlert(json_definition, source, self.config, self.recovery_manger) if alert is not None: alert.set_cluster(clusterName, hostName) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/Controller.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py index eb2c363..c1c16ac 100644 --- a/ambari-agent/src/main/python/ambari_agent/Controller.py +++ b/ambari-agent/src/main/python/ambari_agent/Controller.py @@ -103,12 +103,9 @@ class Controller(threading.Thread): self.move_data_dir_mount_file() - self.alert_grace_period = int(config.get('agent', 'alert_grace_period', 5)) - - self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir, + self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir, stacks_cache_dir, common_services_cache_dir, host_scripts_cache_dir, - self.alert_grace_period, self.cluster_configuration, config, - self.recovery_manager) + self.cluster_configuration, config, self.recovery_manager) self.alert_scheduler_handler.start() http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py index fd6b03c..92db07c 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py @@ -29,6 +29,9 @@ logger = logging.getLogger() AlertUri = namedtuple('AlertUri', 'uri is_ssl_enabled') class BaseAlert(object): + # will force a kinit even if klist says there are valid tickets (4 hour default) + _DEFAULT_KINIT_TIMEOUT = 14400000 + RESULT_OK = "OK" RESULT_WARNING = "WARNING" RESULT_CRITICAL = "CRITICAL" @@ -38,12 +41,12 @@ class BaseAlert(object): HA_NAMESERVICE_PARAM = "{{ha-nameservice}}" HA_ALIAS_PARAM = "{{alias}}" - def __init__(self, alert_meta, alert_source_meta): + def __init__(self, alert_meta, alert_source_meta, config): self.alert_meta = alert_meta self.alert_source_meta = alert_source_meta self.cluster_name = '' self.host_name = '' - + self.config = config def interval(self): """ gets the defined interval this check should run """ http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py index b2f4e33..d177bd4 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py @@ -42,7 +42,7 @@ DEFAULT_CONNECTION_TIMEOUT = 5.0 class MetricAlert(BaseAlert): def __init__(self, alert_meta, alert_source_meta, config): - super(MetricAlert, self).__init__(alert_meta, alert_source_meta) + super(MetricAlert, self).__init__(alert_meta, alert_source_meta, config) connection_timeout = DEFAULT_CONNECTION_TIMEOUT @@ -63,7 +63,9 @@ class MetricAlert(BaseAlert): self.connection_timeout = float(connection_timeout) self.curl_connection_timeout = int(connection_timeout) - self.config = config + # will force a kinit even if klist says there are valid tickets (4 hour default) + self.kinit_timeout = long(config.get('agent', 'alert_kinit_timeout', BaseAlert._DEFAULT_KINIT_TIMEOUT)) + def _collect(self): if self.metric_info is None: @@ -209,7 +211,7 @@ class MetricAlert(BaseAlert): response, error_msg, time_millis = curl_krb_request(tmp_dir, kerberos_keytab, kerberos_principal, url, "metric_alert", kerberos_executable_search_paths, False, self.get_name(), smokeuser, - connection_timeout=self.curl_connection_timeout) + connection_timeout=self.curl_connection_timeout, kinit_timer_ms = self.kinit_timeout) content = response else: @@ -254,7 +256,8 @@ class MetricAlert(BaseAlert): if not json_is_valid and security_enabled and kerberos_principal is not None and kerberos_keytab is not None: http_response_code, error_msg, time_millis = curl_krb_request(tmp_dir, kerberos_keytab, kerberos_principal, url, "metric_alert", kerberos_executable_search_paths, True, - self.get_name(), smokeuser, connection_timeout=self.curl_connection_timeout) + self.get_name(), smokeuser, connection_timeout=self.curl_connection_timeout, + kinit_timer_ms = self.kinit_timeout) return (value_list, http_response_code) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py index 92d28ad..1918327 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py @@ -33,8 +33,8 @@ DEFAULT_CRITICAL_TIMEOUT = 5.0 class PortAlert(BaseAlert): - def __init__(self, alert_meta, alert_source_meta): - super(PortAlert, self).__init__(alert_meta, alert_source_meta) + def __init__(self, alert_meta, alert_source_meta, config): + super(PortAlert, self).__init__(alert_meta, alert_source_meta, config) self.uri = None self.default_port = None http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/recovery_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/recovery_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/recovery_alert.py index 760a737..3092a39 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/recovery_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/recovery_alert.py @@ -30,8 +30,8 @@ DEFAULT_CRITICAL_RECOVERIES_COUNT = 4 UNKNOWN_COMPONENT = 'UNKNOWN_COMPONENT' class RecoveryAlert(BaseAlert): - def __init__(self, alert_meta, alert_source_meta, recovery_manager): - super(RecoveryAlert, self).__init__(alert_meta, alert_source_meta) + def __init__(self, alert_meta, alert_source_meta, config, recovery_manager): + super(RecoveryAlert, self).__init__(alert_meta, alert_source_meta, config) self.recovery_manager = recovery_manager self.warning_count = DEFAULT_WARNING_RECOVERIES_COUNT http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py index e8d0125..8dfa73e 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py @@ -24,13 +24,15 @@ import os import re from alerts.base_alert import BaseAlert from resource_management.core.environment import Environment -from resource_management.core.logger import Logger +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER from ambari_agent import Constants logger = logging.getLogger("ambari_alerts") class ScriptAlert(BaseAlert): + def __init__(self, alert_meta, alert_source_meta, config): + """ ScriptAlert reporting structure is output from the script itself """ alert_source_meta['reporting'] = { @@ -40,9 +42,8 @@ class ScriptAlert(BaseAlert): 'unknown': { 'text': '{0}' } } - super(ScriptAlert, self).__init__(alert_meta, alert_source_meta) + super(ScriptAlert, self).__init__(alert_meta, alert_source_meta, config) - self.config = config self.path = None self.stacks_dir = None self.common_services_dir = None @@ -50,6 +51,9 @@ class ScriptAlert(BaseAlert): self.path_to_script = None self.parameters = {} + # will force a kinit even if klist says there are valid tickets (4 hour default) + self.kinit_timeout = long(config.get('agent', 'alert_kinit_timeout', BaseAlert._DEFAULT_KINIT_TIMEOUT)) + if 'path' in alert_source_meta: self.path = alert_source_meta['path'] @@ -75,6 +79,9 @@ class ScriptAlert(BaseAlert): parameter_value = parameter['value'] self.parameters[parameter_name] = parameter_value + # pass in some basic parameters to the scripts + self.parameters[KERBEROS_KINIT_TIMER_PARAMETER] = self.kinit_timeout + def _collect(self): cmd_module = self._load_source() http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py index 502526c..3f201c8 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/web_alert.py @@ -20,13 +20,13 @@ limitations under the License. import logging import time -import os import urllib2 import ssl + from functools import wraps from urllib2 import HTTPError -from tempfile import gettempdir +from tempfile import gettempdir from alerts.base_alert import BaseAlert from collections import namedtuple from resource_management.libraries.functions.get_port_from_url import get_port_from_url @@ -66,7 +66,7 @@ ssl.wrap_socket = sslwrap(ssl.wrap_socket) class WebAlert(BaseAlert): def __init__(self, alert_meta, alert_source_meta, config): - super(WebAlert, self).__init__(alert_meta, alert_source_meta) + super(WebAlert, self).__init__(alert_meta, alert_source_meta, config) connection_timeout = DEFAULT_CONNECTION_TIMEOUT @@ -83,7 +83,8 @@ class WebAlert(BaseAlert): self.connection_timeout = float(connection_timeout) self.curl_connection_timeout = int(connection_timeout) - self.config = config + # will force a kinit even if klist says there are valid tickets (4 hour default) + self.kinit_timeout = long(config.get('agent', 'alert_kinit_timeout', BaseAlert._DEFAULT_KINIT_TIMEOUT)) def _collect(self): @@ -194,7 +195,7 @@ class WebAlert(BaseAlert): response_code, error_msg, time_millis = curl_krb_request(tmp_dir, kerberos_keytab, kerberos_principal, url, "web_alert", kerberos_executable_search_paths, True, self.get_name(), smokeuser, - connection_timeout=self.curl_connection_timeout) + connection_timeout=self.curl_connection_timeout, kinit_timer_ms = self.kinit_timeout) else: # kerberos is not involved; use urllib2 response_code, time_millis, error_msg = self._make_web_request_urllib(url) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py b/ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py index 9fd426f..f4e7ba1 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py +++ b/ambari-agent/src/test/python/ambari_agent/TestAlertSchedulerHandler.py @@ -24,9 +24,10 @@ import os from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler from ambari_agent.alerts.metric_alert import MetricAlert from ambari_agent.alerts.port_alert import PortAlert -from ambari_agent.alerts.script_alert import ScriptAlert from ambari_agent.alerts.web_alert import WebAlert +from AmbariConfig import AmbariConfig + from mock.mock import patch, Mock, MagicMock from unittest import TestCase @@ -34,6 +35,9 @@ TEST_PATH = os.path.join('ambari_agent', 'dummy_files') class TestAlertSchedulerHandler(TestCase): + def setUp(self): + self.config = AmbariConfig() + def test_load_definitions(self): scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None) @@ -42,7 +46,7 @@ class TestAlertSchedulerHandler(TestCase): self.assertEquals(len(definitions), 1) def test_json_to_callable_metric(self): - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) json_definition = { 'source': { 'type': 'METRIC' @@ -63,7 +67,7 @@ class TestAlertSchedulerHandler(TestCase): } } - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition)) self.assertTrue(callable_result is not None) @@ -79,7 +83,7 @@ class TestAlertSchedulerHandler(TestCase): } } - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition)) self.assertTrue(callable_result is not None) @@ -94,7 +98,7 @@ class TestAlertSchedulerHandler(TestCase): } } - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) callable_result = scheduler._AlertSchedulerHandler__json_to_callable('cluster', 'host', copy.deepcopy(json_definition)) self.assertTrue(callable_result is None) @@ -102,7 +106,7 @@ class TestAlertSchedulerHandler(TestCase): def test_execute_alert_noneScheduler(self): execution_commands = [] - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) scheduler._AlertSchedulerHandler__scheduler = None alert_mock = Mock() scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock) @@ -114,7 +118,7 @@ class TestAlertSchedulerHandler(TestCase): def test_execute_alert_noneCommands(self): execution_commands = None - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) alert_mock = Mock() scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock) @@ -125,7 +129,7 @@ class TestAlertSchedulerHandler(TestCase): def test_execute_alert_emptyCommands(self): execution_commands = [] - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) alert_mock = Mock() scheduler._AlertSchedulerHandler__json_to_callable = Mock(return_value=alert_mock) @@ -144,7 +148,7 @@ class TestAlertSchedulerHandler(TestCase): } ] - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) alert_mock = MagicMock() alert_mock.collect = Mock() alert_mock.set_helpers = Mock() @@ -159,7 +163,7 @@ class TestAlertSchedulerHandler(TestCase): self.assertTrue(alert_mock.collect.called) def test_load_definitions(self): - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) scheduler._AlertSchedulerHandler__config_maps = { 'cluster': {} } @@ -170,7 +174,7 @@ class TestAlertSchedulerHandler(TestCase): self.assertTrue(isinstance(alert_def, PortAlert)) def test_load_definitions_noFile(self): - scheduler = AlertSchedulerHandler('wrong_path', 'wrong_path', 'wrong_path', 'wrong_path', 5, None, None, None) + scheduler = AlertSchedulerHandler('wrong_path', 'wrong_path', 'wrong_path', 'wrong_path', None, self.config, None) scheduler._AlertSchedulerHandler__config_maps = { 'cluster': {} } @@ -190,7 +194,7 @@ class TestAlertSchedulerHandler(TestCase): } ] - scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, 5, None, None, None) + scheduler = AlertSchedulerHandler(TEST_PATH, TEST_PATH, TEST_PATH, TEST_PATH, None, self.config, None) alert_mock = MagicMock() alert_mock.interval = Mock(return_value=5) alert_mock.collect = Mock() http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestAlerts.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py index 8344238..bf56703 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py +++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py @@ -41,11 +41,14 @@ from collections import namedtuple from mock.mock import MagicMock, patch from unittest import TestCase +from AmbariConfig import AmbariConfig + class TestAlerts(TestCase): def setUp(self): # save original open() method for later use self.original_open = open + self.config = AmbariConfig() def tearDown(self): sys.stdout == sys.__stdout__ @@ -62,8 +65,8 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() ash = AlertSchedulerHandler(test_file_path, test_stack_path, - test_common_services_path, test_host_scripts_path, 5, cluster_configuration, - None, None) + test_common_services_path, test_host_scripts_path, cluster_configuration, + self.config, None) ash.start() @@ -89,7 +92,7 @@ class TestAlerts(TestCase): 0,2000,336283100000, socket.timeout,336283200000] - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") self.assertEquals(6, alert.interval()) @@ -134,7 +137,7 @@ class TestAlerts(TestCase): self.__update_cluster_configuration(cluster_configuration, {}) rm = RecoveryManager(tempfile.mktemp(), True) - alert = RecoveryAlert(definition_json, definition_json['source'], rm) + alert = RecoveryAlert(definition_json, definition_json['source'], self.config, rm) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") self.assertEquals(1, alert.interval()) @@ -221,7 +224,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6402.ambari.apache.org") @@ -266,7 +269,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(AlertCollector(), cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -354,7 +357,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -388,7 +391,7 @@ class TestAlerts(TestCase): del definition_json['source']['jmx']['value'] collector = AlertCollector() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -412,7 +415,7 @@ class TestAlerts(TestCase): # indicating that there was no URI and the result is UNKNOWN collector = AlertCollector() cluster_configuration = self.__get_cluster_configuration() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -428,7 +431,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -444,7 +447,7 @@ class TestAlerts(TestCase): self.__update_cluster_configuration(cluster_configuration, configuration) collector = AlertCollector() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -460,7 +463,7 @@ class TestAlerts(TestCase): self.__update_cluster_configuration(cluster_configuration, configuration) collector = AlertCollector() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -477,7 +480,7 @@ class TestAlerts(TestCase): self.__update_cluster_configuration(cluster_configuration, configuration) collector = AlertCollector() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -501,7 +504,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -515,7 +518,7 @@ class TestAlerts(TestCase): # run the alert and check HTTP 500 wa_make_web_request_mock.return_value = WebResponse(500,1.234,"Internal Server Error") collector = AlertCollector() - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -530,7 +533,7 @@ class TestAlerts(TestCase): wa_make_web_request_mock.return_value = WebResponse(0,0,'error message') collector = AlertCollector() - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") alert.collect() @@ -551,7 +554,7 @@ class TestAlerts(TestCase): self.__update_cluster_configuration(cluster_configuration, configuration) collector = AlertCollector() - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -573,8 +576,8 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() ash = AlertSchedulerHandler(test_file_path, test_stack_path, - test_common_services_path, test_host_scripts_path, 5, cluster_configuration, - None, None) + test_common_services_path, test_host_scripts_path, cluster_configuration, + self.config, None) ash.start() @@ -594,7 +597,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") self.assertEquals(6, alert.interval()) @@ -620,8 +623,8 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() ash = AlertSchedulerHandler(test_file_path, test_stack_path, - test_common_services_path, test_host_scripts_path, 5, cluster_configuration, - None, None) + test_common_services_path, test_host_scripts_path, cluster_configuration, + self.config, None) ash.start() @@ -629,20 +632,20 @@ class TestAlerts(TestCase): definition_json = self._get_port_alert_definition() - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) ash.schedule_definition(alert) self.assertEquals(2, ash.get_job_count()) definition_json['enabled'] = False - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) ash.schedule_definition(alert) # verify disabled alert not scheduled self.assertEquals(2, ash.get_job_count()) definition_json['enabled'] = True - pa = PortAlert(definition_json, definition_json['source']) + pa = PortAlert(definition_json, definition_json['source'], self.config) ash.schedule_definition(pa) # verify enabled alert was scheduled @@ -656,8 +659,8 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() ash = AlertSchedulerHandler(test_file_path, test_stack_path, - test_common_services_path, test_host_scripts_path, 5, cluster_configuration, - None, None) + test_common_services_path, test_host_scripts_path, cluster_configuration, + self.config, None) ash.start() @@ -691,7 +694,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = ScriptAlert(definition_json, definition_json['source'], None) + alert = ScriptAlert(definition_json, definition_json['source'], self.config) # instruct the test alert script to be skipped alert.set_helpers(collector, cluster_configuration ) @@ -709,32 +712,32 @@ class TestAlerts(TestCase): def test_default_reporting_text(self): definition_json = self._get_script_alert_definition() - alert = ScriptAlert(definition_json, definition_json['source'], None) + alert = ScriptAlert(definition_json, definition_json['source'], self.config) self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), '{0}') self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), '{0}') self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), '{0}') definition_json['source']['type'] = 'PORT' - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), 'TCP OK - {0:.4f} response on port {1}') self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), 'TCP OK - {0:.4f} response on port {1}') self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), 'Connection failed: {0} to {1}:{2}') definition_json['source']['type'] = 'WEB' - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), 'HTTP {0} response in {2:.4f} seconds') self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), 'HTTP {0} response in {2:.4f} seconds') self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), 'Connection failed to {1}') definition_json['source']['type'] = 'METRIC' - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), '{0}') self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), '{0}') self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), '{0}') rm = RecoveryManager(tempfile.mktemp()) definition_json['source']['type'] = 'RECOVERY' - alert = RecoveryAlert(definition_json, definition_json['source'], rm) + alert = RecoveryAlert(definition_json, definition_json['source'], self.config, rm) self.assertEquals(alert._get_reporting_text(alert.RESULT_OK), 'No recovery operations executed for {2}{0}.') self.assertEquals(alert._get_reporting_text(alert.RESULT_WARNING), '{1} recovery operations executed for {2}{0}.') self.assertEquals(alert._get_reporting_text(alert.RESULT_CRITICAL), '{1} recovery operations executed for {2}{0}.') @@ -973,7 +976,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -1065,13 +1068,13 @@ class TestAlerts(TestCase): def test_uri_timeout(self): # the web alert will have a timeout value definition_json = self._get_web_alert_definition() - alert = WebAlert(definition_json, definition_json['source'], None) + alert = WebAlert(definition_json, definition_json['source'], self.config) self.assertEquals(5.678, alert.connection_timeout) self.assertEquals(5, alert.curl_connection_timeout) # the metric definition will not and should default to 5.0 definition_json = self._get_metric_alert_definition() - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) self.assertEquals(5.0, alert.connection_timeout) @@ -1129,7 +1132,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = MetricAlert(definition_json, definition_json['source'], None) + alert = MetricAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6401.ambari.apache.org") @@ -1162,7 +1165,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6402.ambari.apache.org") @@ -1200,7 +1203,7 @@ class TestAlerts(TestCase): cluster_configuration = self.__get_cluster_configuration() self.__update_cluster_configuration(cluster_configuration, configuration) - alert = PortAlert(definition_json, definition_json['source']) + alert = PortAlert(definition_json, definition_json['source'], self.config) alert.set_helpers(collector, cluster_configuration) alert.set_cluster("c1", "c6402.ambari.apache.org") @@ -1523,7 +1526,7 @@ class MockAlert(BaseAlert): Mock class for testing """ def __init__(self): - super(MockAlert, self).__init__(None, None) + super(MockAlert, self).__init__(None, None, AmbariConfig()) def get_name(self): return "mock_alert" http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestBaseAlert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestBaseAlert.py b/ambari-agent/src/test/python/ambari_agent/TestBaseAlert.py index e67c894..62877f2 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestBaseAlert.py +++ b/ambari-agent/src/test/python/ambari_agent/TestBaseAlert.py @@ -20,16 +20,18 @@ limitations under the License. from unittest import TestCase from alerts.base_alert import BaseAlert - -alert = BaseAlert({}, {}) +from AmbariConfig import AmbariConfig class TestBaseAlert(TestCase): + def setUp(self): + self.config = AmbariConfig() + def test_interval_noData(self): alert_meta = {} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) interval = alert.interval() self.assertEquals(interval, 1) @@ -37,7 +39,7 @@ class TestBaseAlert(TestCase): alert_meta = {'interval': 0} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) interval = alert.interval() self.assertEquals(interval, 1) @@ -45,7 +47,7 @@ class TestBaseAlert(TestCase): alert_meta = {'interval': 5} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) interval = alert.interval() self.assertEquals(interval, 5) @@ -53,7 +55,7 @@ class TestBaseAlert(TestCase): alert_meta = {'enabled': 'true'} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) enabled = alert.is_enabled() self.assertEquals(enabled, 'true') @@ -61,7 +63,7 @@ class TestBaseAlert(TestCase): alert_meta = {'name': 'ambari'} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) name = alert.get_name() self.assertEquals(name, 'ambari') @@ -69,7 +71,7 @@ class TestBaseAlert(TestCase): alert_meta = {'uuid': '123'} alert_source_meta = {} - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) uuid = alert.get_uuid() self.assertEquals(uuid, '123') @@ -79,7 +81,7 @@ class TestBaseAlert(TestCase): cluster = 'cluster' host = 'host' - alert = BaseAlert(alert_meta, alert_source_meta) + alert = BaseAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) self.assertEquals(alert.cluster_name, cluster) self.assertEquals(alert.host_name, host) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestMetricAlert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestMetricAlert.py b/ambari-agent/src/test/python/ambari_agent/TestMetricAlert.py index 23e9f13..9dfb8e9 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestMetricAlert.py +++ b/ambari-agent/src/test/python/ambari_agent/TestMetricAlert.py @@ -21,10 +21,13 @@ limitations under the License. from unittest import TestCase from alerts.metric_alert import MetricAlert from mock.mock import Mock, MagicMock, patch -import os +from AmbariConfig import AmbariConfig class TestMetricAlert(TestCase): + def setUp(self): + self.config = AmbariConfig() + @patch("urllib2.urlopen") def test_collect(self, urllib): alert_meta = { @@ -81,7 +84,7 @@ class TestMetricAlert(TestCase): mock_collector = MagicMock() mock_collector.put = Mock(side_effect=collector_side_effect) - alert = MetricAlert(alert_meta, alert_source_meta) + alert = MetricAlert(alert_meta, alert_source_meta, self.config) alert.set_helpers(mock_collector, {'foo-site/bar': 12, 'foo-site/baz': 'asd'}) alert.set_cluster(cluster, host) @@ -143,7 +146,7 @@ class TestMetricAlert(TestCase): mock_collector = MagicMock() mock_collector.put = Mock(side_effect=collector_side_effect) - alert = MetricAlert(alert_meta, alert_source_meta) + alert = MetricAlert(alert_meta, alert_source_meta, self.config) alert.set_helpers(mock_collector, {'foo-site/bar': 12, 'foo-site/baz': 'asd'}) alert.set_cluster(cluster, host) @@ -204,7 +207,7 @@ class TestMetricAlert(TestCase): mock_collector = MagicMock() mock_collector.put = Mock(side_effect=collector_side_effect) - alert = MetricAlert(alert_meta, alert_source_meta, None) + alert = MetricAlert(alert_meta, alert_source_meta, self.config) alert.set_helpers(mock_collector, {'foo-site/bar': 12, 'foo-site/baz': 'asd'}) alert.set_cluster(cluster, host) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestPortAlert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestPortAlert.py b/ambari-agent/src/test/python/ambari_agent/TestPortAlert.py index 195cc63..dffa56c 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestPortAlert.py +++ b/ambari-agent/src/test/python/ambari_agent/TestPortAlert.py @@ -21,9 +21,13 @@ limitations under the License. from unittest import TestCase from alerts.port_alert import PortAlert from mock.mock import Mock, MagicMock, patch +from AmbariConfig import AmbariConfig class TestPortAlert(TestCase): + def setUp(self): + self.config = AmbariConfig() + @patch("socket.socket") @patch("time.time") def test_collect_defaultPort(self, time, socket): @@ -44,7 +48,7 @@ class TestPortAlert(TestCase): expected_state = 'OK' expected_text = 'TCP OK - 0.2010 response on port 80' time.side_effect = [123, 324, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -84,7 +88,7 @@ class TestPortAlert(TestCase): expected_state = 'WARNING' expected_text = 'TCP OK - 3.1170 response on port 8080' time.side_effect = [123, 3240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -124,7 +128,7 @@ class TestPortAlert(TestCase): expected_state = 'CRITICAL' expected_text = 'Connection failed: Socket Timeout to 192.168.0.1:8080' time.side_effect = [123, 5240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -163,7 +167,7 @@ class TestPortAlert(TestCase): expected_state = 'CRITICAL' expected_text = 'Connection failed: Socket Timeout to host1:80' time.side_effect = [123, 5240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -204,7 +208,7 @@ class TestPortAlert(TestCase): expected_text = 'Connection failed: exception message to 192.168.0.1:8080' time.side_effect = [123, 345, 567] socket.side_effect = Exception('exception message') - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -249,7 +253,7 @@ class TestPortAlert(TestCase): expected_state = 'OK' expected_text = 'TCP OK - 3.1170 response on port 8080' time.side_effect = [123, 3240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -294,7 +298,7 @@ class TestPortAlert(TestCase): expected_state = 'CRITICAL' expected_text = 'Connection failed: Socket Timeout to 192.168.0.1:8080' time.side_effect = [123, 3240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): @@ -339,7 +343,7 @@ class TestPortAlert(TestCase): expected_state = 'CRITICAL' expected_text = 'Connection failed: Socket Timeout to 192.168.0.1:8080' time.side_effect = [120, 123, 5240, 567] - alert = PortAlert(alert_meta, alert_source_meta) + alert = PortAlert(alert_meta, alert_source_meta, self.config) alert.set_cluster(cluster, host) def collector_side_effect(clus, data): http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-agent/src/test/python/ambari_agent/TestScriptAlert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestScriptAlert.py b/ambari-agent/src/test/python/ambari_agent/TestScriptAlert.py index 46c7651..a56258b 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestScriptAlert.py +++ b/ambari-agent/src/test/python/ambari_agent/TestScriptAlert.py @@ -23,10 +23,15 @@ from alerts.script_alert import ScriptAlert from mock.mock import Mock, MagicMock, patch import os +from AmbariConfig import AmbariConfig + DUMMY_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'dummy_files') class TestScriptAlert(TestCase): + def setUp(self): + self.config = AmbariConfig() + def test_collect(self): alert_meta = { 'name': 'alert1', @@ -59,7 +64,7 @@ class TestScriptAlert(TestCase): mock_collector = MagicMock() mock_collector.put = Mock(side_effect=collector_side_effect) - alert = ScriptAlert(alert_meta, alert_source_meta, {}) + alert = ScriptAlert(alert_meta, alert_source_meta, self.config) alert.set_helpers(mock_collector, {'foo-site/bar': 12, 'foo-site/baz': 'asd'}) alert.set_cluster(cluster, host) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-common/src/main/python/resource_management/libraries/functions/curl_krb_request.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/curl_krb_request.py b/ambari-common/src/main/python/resource_management/libraries/functions/curl_krb_request.py index 1ccc45f..21cdd09 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/curl_krb_request.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/curl_krb_request.py @@ -24,7 +24,6 @@ __all__ = ["curl_krb_request"] import logging import os import time -import threading from resource_management.core import global_lock from resource_management.core import shell @@ -49,12 +48,52 @@ MAX_TIMEOUT_DEFAULT = CONNECTION_TIMEOUT_DEFAULT + 2 logger = logging.getLogger() +# a dictionary of the last time that a kinit was performed for a specific cache +# dicionaries are inherently thread-safe in Python via the Global Interpreer Lock +# https://docs.python.org/2/glossary.html#term-global-interpreter-lock +_KINIT_CACHE_TIMES = {} + +# the default time in between forced kinit calls (4 hours) +DEFAULT_KERBEROS_KINIT_TIMER_MS = 14400000 + +# a parameter which can be used to pass around the above timout value +KERBEROS_KINIT_TIMER_PARAMETER = "kerberos.kinit.timer" + def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, - krb_exec_search_paths, return_only_http_code, alert_name, user, - connection_timeout = CONNECTION_TIMEOUT_DEFAULT): + krb_exec_search_paths, return_only_http_code, caller_label, user, + connection_timeout = CONNECTION_TIMEOUT_DEFAULT, + kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS): + """ + Makes a curl request using the kerberos credentials stored in a calculated cache file. The + cache file is created by combining the supplied principal, keytab, user, and request name into + a unique hash. + + This function will use the klist command to determine if the cache is expired and will perform + a kinit if necessary. Additionally, it has an internal timer to force a kinit after a + configurable amount of time. This is to prevent boundary issues where requests hit the edge + of a ticket's lifetime. + + :param tmp_dir: the directory to use for storing the local kerberos cache for this request. + :param keytab: the location of the keytab to use when performing a kinit + :param principal: the principal to use when performing a kinit + :param url: the URL to request + :param cache_file_prefix: an identifier used to build the unique cache name for this request. + This ensures that multiple requests can use the same cache. + :param krb_exec_search_paths: the search path to use for invoking kerberos binaries + :param return_only_http_code: True to return only the HTTP code, False to return GET content + :param caller_label: an identifier to give context into the caller of this module (used for logging) + :param user: the user to invoke the curl command as + :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) + :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the + klist cache is still valid. + :return: + """ import uuid + # start off false + is_kinit_required = False + # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope @@ -75,19 +114,41 @@ def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, else: klist_path_local = get_klist_path() - if shell.call("{0} -s {1}".format(klist_path_local, ccache_file_path), user=user)[0] != 0: + # take a look at the last time kinit was run for the specified cache and force a new + # kinit if it's time; this helps to avoid problems approaching ticket boundary when + # executing a klist and then a curl + last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) + current_time = long(time.time()) + if current_time - kinit_timer_ms > last_kinit_time: + is_kinit_required = True + + # if the time has not expired, double-check that the cache still has a valid ticket + if not is_kinit_required: + klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) + is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) + + # if kinit is required, the perform the kinit + if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() - logger.debug("[Alert][{0}] Enabling Kerberos authentication via GSSAPI using ccache at {1}.".format( - alert_name, ccache_file_path)) + logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", + caller_label, ccache_file_path) + + # kinit; there's no need to set a ticket timeout as this will use the default invalidation + # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache + # from working correctly + shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, + ccache_file_path, keytab, principal), user=user) - shell.checked_call("{0} -l 5m -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, ccache_file_path, keytab, principal), user=user) + # record kinit time + _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: - logger.debug("[Alert][{0}] Kerberos authentication via GSSAPI already enabled using ccache at {1}.".format( - alert_name, ccache_file_path)) + # no kinit needed, use the cache + logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", + caller_label, ccache_file_path) finally: kinit_lock.release() @@ -119,7 +180,7 @@ def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): - logger.exception("[Alert][{0}] Unable to make a web request.".format(alert_name)) + logger.exception("Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): @@ -138,6 +199,7 @@ def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, else: return (curl_stdout, error_msg, time_millis) - logger.debug("[Alert][{0}] Curl response is empty! Please take a look at error message: ". - format(alert_name, str(error_msg))) + logger.debug("The curl response for %s is empty; standard error = %s", + caller_label, str(error_msg)) + return ("", error_msg, time_millis) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py index ef389cd..71e34e6 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py @@ -25,6 +25,8 @@ import logging import traceback from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER from resource_management.core.environment import Environment LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]' @@ -133,6 +135,8 @@ def execute(configurations={}, parameters={}, host_name=None): if PERCENT_CRITICAL_KEY in parameters: percent_critical = float(parameters[PERCENT_CRITICAL_KEY]) * 100 + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + # determine the right URI and whether to use SSL uri = http_uri if http_policy == 'HTTPS_ONLY': @@ -159,14 +163,16 @@ def execute(configurations={}, parameters={}, host_name=None): last_checkpoint_time_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, last_checkpoint_time_qry,"checkpoint_time_alert", executable_paths, False, - "NameNode Last Checkpoint", smokeuser, connection_timeout=curl_connection_timeout) + "NameNode Last Checkpoint", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) last_checkpoint_time_response_json = json.loads(last_checkpoint_time_response) last_checkpoint_time = int(last_checkpoint_time_response_json["beans"][0]["LastCheckpointTime"]) journal_transaction_info_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, journal_transaction_info_qry,"checkpoint_time_alert", executable_paths, - False, "NameNode Last Checkpoint", smokeuser, connection_timeout=curl_connection_timeout) + False, "NameNode Last Checkpoint", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) journal_transaction_info_response_json = json.loads(journal_transaction_info_response) journal_transaction_info = journal_transaction_info_response_json["beans"][0]["JournalTransactionInfo"] http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py index a174cb4..70b1970 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py @@ -21,9 +21,10 @@ limitations under the License. import urllib2 import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. import logging -import traceback from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER from resource_management.core.environment import Environment RESULT_STATE_OK = 'OK' @@ -110,6 +111,7 @@ def execute(configurations={}, parameters={}, host_name=None): kerberos_principal = configurations[KERBEROS_PRINCIPAL] kerberos_principal = kerberos_principal.replace('_HOST', host_name) + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) # determine whether or not SSL is enabled is_ssl_enabled = False @@ -165,7 +167,8 @@ def execute(configurations={}, parameters={}, host_name=None): state_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, jmx_uri,"ha_nn_health", executable_paths, False, - "NameNode High Availability Health", smokeuser, connection_timeout=curl_connection_timeout) + "NameNode High Availability Health", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) state = _get_ha_state_from_json(state_response) else: http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py index 217f3b8..f6a9a56 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_metrics_deviation.py @@ -25,7 +25,12 @@ from math import sqrt import urllib import time import urllib2 -from resource_management import Environment, curl_krb_request +from resource_management import Environment + +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER + RESULT_STATE_OK = 'OK' RESULT_STATE_CRITICAL = 'CRITICAL' @@ -178,6 +183,8 @@ def execute(configurations={}, parameters={}, host_name=None): if dfs_policy == "HTTPS_ONLY": is_ssl_enabled = True + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + name_service = configurations[NAMESERVICE_KEY] hdfs_site = configurations[HDFS_SITE_KEY] @@ -215,9 +222,10 @@ def execute(configurations={}, parameters={}, host_name=None): # curl requires an integer timeout curl_connection_timeout = int(connection_timeout) - state_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, - kerberos_keytab, kerberos_principal, jmx_uri,"ha_nn_health", executable_paths, False, - "NameNode High Availability Health", smokeuser, connection_timeout=curl_connection_timeout) + state_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, + kerberos_keytab, kerberos_principal, jmx_uri,"ha_nn_health", executable_paths, False, + "NameNode High Availability Health", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) state = _get_ha_state_from_json(state_response) else: http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_upgrade_finalized.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_upgrade_finalized.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_upgrade_finalized.py index 6e8945c..fbda22e 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_upgrade_finalized.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_upgrade_finalized.py @@ -23,7 +23,10 @@ import ambari_simplejson as json # simplejson is much faster comparing to Python import logging import traceback -from resource_management.libraries.functions.curl_krb_request import curl_krb_request, CONNECTION_TIMEOUT_DEFAULT +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER +from resource_management.libraries.functions.curl_krb_request import CONNECTION_TIMEOUT_DEFAULT from resource_management.core.environment import Environment NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}' @@ -100,6 +103,8 @@ def execute(configurations={}, parameters={}, host_name=None): kerberos_principal = configurations[KERBEROS_PRINCIPAL] kerberos_principal = kerberos_principal.replace('_HOST', host_name) + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + # determine the right URI and whether to use SSL uri = http_uri if http_policy == 'HTTPS_ONLY': @@ -121,7 +126,7 @@ def execute(configurations={}, parameters={}, host_name=None): last_checkpoint_time_response, error_msg, time_millis = curl_krb_request( env.tmp_dir, kerberos_keytab, kerberos_principal, upgrade_finalized_qry, "upgrade_finalized_state", executable_paths, False, - "HDFS Upgrade Finalized State", smokeuser + "HDFS Upgrade Finalized State", smokeuser, kinit_timer_ms = kinit_timer_ms ) upgrade_finalized_response_json = json.loads(last_checkpoint_time_response) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py index b49fd6e..c9575c0 100644 --- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py +++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py @@ -27,6 +27,9 @@ import logging from resource_management.core.environment import Environment from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER + RESULT_CODE_OK = "OK" RESULT_CODE_CRITICAL = "CRITICAL" @@ -148,11 +151,13 @@ def execute(configurations={}, parameters={}, host_name=None): if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + env = Environment.get_instance() stdout, stderr, time_millis = curl_krb_request(env.tmp_dir, smokeuser_keytab, smokeuser_principal, - query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, True, - "WebHCat Server Status", smokeuser, - connection_timeout=curl_connection_timeout) + query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, True, + "WebHCat Server Status", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) # check the response code response_code = int(stdout) @@ -169,9 +174,10 @@ def execute(configurations={}, parameters={}, host_name=None): # now that we have the http status and it was 200, get the content stdout, stderr, total_time = curl_krb_request(env.tmp_dir, smokeuser_keytab, smokeuser_principal, - query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, - False, "WebHCat Server Status", smokeuser, - connection_timeout=curl_connection_timeout) + query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, + False, "WebHCat Server Status", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + json_response = json.loads(stdout) except: return (RESULT_CODE_CRITICAL, [traceback.format_exc()]) http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py index ef5e6b3..2105bed 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py @@ -26,6 +26,8 @@ import traceback from ambari_commons import OSCheck from ambari_commons.inet_utils import resolve_address from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER from resource_management.core.environment import Environment RESULT_CODE_OK = 'OK' @@ -155,9 +157,11 @@ def execute(configurations={}, parameters={}, host_name=None): # curl requires an integer timeout curl_connection_timeout = int(connection_timeout) + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, query, "nm_health_alert", executable_paths, False, "NodeManager Health", smokeuser, - connection_timeout=curl_connection_timeout) + connection_timeout=curl_connection_timeout, kinit_timer_ms = kinit_timer_ms) json_response = json.loads(url_response) else: http://git-wip-us.apache.org/repos/asf/ambari/blob/2efe8945/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py index 119a1a1..adf27ec 100644 --- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py +++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py @@ -25,6 +25,8 @@ import traceback from ambari_commons.urllib_handlers import RefreshHeaderProcessor from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER from resource_management.core.environment import Environment ERROR_LABEL = '{0} NodeManager{1} {2} unhealthy.' @@ -109,6 +111,8 @@ def execute(configurations={}, parameters={}, host_name=None): if CONNECTION_TIMEOUT_KEY in parameters: connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + # determine the right URI and whether to use SSL uri = http_uri if http_policy == 'HTTPS_ONLY': @@ -130,7 +134,8 @@ def execute(configurations={}, parameters={}, host_name=None): url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, False, - "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout) + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) try: url_response_json = json.loads(url_response) @@ -143,7 +148,8 @@ def execute(configurations={}, parameters={}, host_name=None): if convert_to_json_failed: response_code, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, True, - "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout) + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) else: live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, "LiveNodeManagers", connection_timeout))
