Repository: ambari Updated Branches: refs/heads/branch-3.0-perf a9774d664 -> 7fb94cb81
AMBARI-21741. Add CREATE/UPDATE/DELETE actions to alert_definitions event (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7fb94cb8 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7fb94cb8 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7fb94cb8 Branch: refs/heads/branch-3.0-perf Commit: 7fb94cb814050534571e375c7ce368b080decc3c Parents: a9774d6 Author: Andrew Onishuk <[email protected]> Authored: Fri Aug 18 10:54:33 2017 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Fri Aug 18 10:54:33 2017 +0300 ---------------------------------------------------------------------- .../src/main/python/ambari_agent/ActionQueue.py | 10 ++ .../ambari_agent/AlertSchedulerHandler.py | 17 +-- .../ClusterAlertDefinitionsCache.py | 38 ++++++ .../listeners/AlertDefinitionsEventListener.py | 18 ++- .../src/main/python/ambari_agent/main.py | 6 +- .../ambari_agent/TestAgentStompResponses.py | 126 ++++++++++++++----- .../stomp/alert_definition_expected.json | 74 +++++++++++ .../dummy_files/stomp/alert_definitions.json | 33 +---- .../stomp/alert_definitions_add.json | 45 +++++++ .../stomp/alert_definitions_delete.json | 60 +++++++++ .../stomp/alert_definitions_edit.json | 44 +++++++ .../stomp/alert_definitions_small.json | 92 ++++++++++++++ 12 files changed, 482 insertions(+), 81 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/main/python/ambari_agent/ActionQueue.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index b3802b1..42d0ffe 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -102,6 +102,9 @@ class ActionQueue(threading.Thread): else: self.commandQueue.put(command) + def interrupt(self): + self.commandQueue.put(None) + def cancel(self, commands): for command in commands: @@ -136,12 +139,19 @@ class ActionQueue(threading.Thread): try: if self.parallel_execution == 0: command = self.commandQueue.get(True, self.EXECUTION_COMMAND_WAIT_TIME) + + if command == None: + break + self.process_command(command) else: # If parallel execution is enabled, just kick off all available # commands using separate threads while not self.stop_event.is_set(): command = self.commandQueue.get(True, self.EXECUTION_COMMAND_WAIT_TIME) + + if command == None: + break # If command is not retry_enabled then do not start them in parallel # checking just one command is enough as all commands for a stage is sent # at the same time and retry is only enabled for initial start/install http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py index 3d7c30c..94e72c2 100644 --- a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py +++ b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py @@ -62,9 +62,6 @@ class AlertSchedulerHandler(): self.config = initializer_module.config - # a mapping between a cluster name and a unique hash for all definitions - self._cluster_hashes = {} - # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int(self.config.get('agent', 'alert_grace_period', 5)) @@ -108,7 +105,7 @@ class AlertSchedulerHandler(): self.stop() - def update_definitions(self): + def update_definitions(self, event_type): """ Updates the persisted alert definitions JSON. :return: @@ -119,12 +116,7 @@ class AlertSchedulerHandler(): command_copy = Utils.get_mutable_copy(command) alert_definitions.append(command_copy) - # determine how to reschedule the jobs - reschedule_all = False - if "clusterName" in command_copy and command_copy["clusterName"] not in self._cluster_hashes: - reschedule_all = True - - if reschedule_all is True: + if event_type == "CREATE": # reschedule all jobs, creating new instances self.reschedule_all() else: @@ -255,12 +247,7 @@ class AlertSchedulerHandler(): for cluster_id, command_json in self.alert_definitions_cache.iteritems(): clusterName = '' if not 'clusterName' in command_json else command_json['clusterName'] hostName = '' if not 'hostName' in command_json else command_json['hostName'] - clusterHash = None if not 'hash' in command_json else command_json['hash'] - # cache the cluster and cluster hash after loading the JSON - if clusterName != '' and clusterHash is not None: - logger.info('[AlertScheduler] Caching cluster {0} with alert hash {1}'.format(clusterName, clusterHash)) - self._cluster_hashes[clusterName] = clusterHash for definition in command_json['alertDefinitions']: alert = self.__json_to_callable(clusterName, hostName, Utils.get_mutable_copy(definition)) http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/main/python/ambari_agent/ClusterAlertDefinitionsCache.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/ClusterAlertDefinitionsCache.py b/ambari-agent/src/main/python/ambari_agent/ClusterAlertDefinitionsCache.py index 39b88e6..a1f7199 100644 --- a/ambari-agent/src/main/python/ambari_agent/ClusterAlertDefinitionsCache.py +++ b/ambari-agent/src/main/python/ambari_agent/ClusterAlertDefinitionsCache.py @@ -58,5 +58,43 @@ class ClusterAlertDefinitionsCache(ClusterCache): """ super(ClusterAlertDefinitionsCache, self).__init__(cluster_cache_dir) + def get_alert_definition_index_by_id(self, alert_dict, cluster_id, alert_id): + definitions = alert_dict[cluster_id]['alertDefinitions'] + for i in xrange(len(definitions)): + if definitions[i]['definitionId'] == alert_id: + return i + + return None + + def cache_update(self, cache_update): + mutable_dict = self._get_mutable_copy() + + for cluster_id in mutable_dict: + for alert_definition in cache_update[cluster_id]['alertDefinitions']: + id_to_update = alert_definition['definitionId'] + index_of_alert = self.get_alert_definition_index_by_id(mutable_dict, cluster_id, id_to_update) + if index_of_alert == None: + mutable_dict[cluster_id]['alertDefinitions'].append(alert_definition) + else: + mutable_dict[cluster_id]['alertDefinitions'][index_of_alert] = alert_definition + + self.rewrite_cache(mutable_dict) + + def cache_delete(self, cache_update): + mutable_dict = self._get_mutable_copy() + + for cluster_id in mutable_dict: + for alert_definition in cache_update[cluster_id]['alertDefinitions']: + id_to_update = alert_definition['definitionId'] + index_of_alert = self.get_alert_definition_index_by_id(mutable_dict, cluster_id, id_to_update) + + if index_of_alert == None: + raise Exception("Cannot delete an alert with id={0}".format(id_to_update)) + + del mutable_dict[cluster_id]['alertDefinitions'][index_of_alert] + + self.rewrite_cache(mutable_dict) + + def get_cache_name(self): return 'alert_definitions' \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py b/ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py index 0829c31..cf72a4d 100644 --- a/ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py +++ b/ambari-agent/src/main/python/ambari_agent/listeners/AlertDefinitionsEventListener.py @@ -45,11 +45,21 @@ class AlertDefinitionsEventListener(EventListener): if message == {}: return - self.alert_definitions_cache.rewrite_cache(message['clusters']) - print message - self.alert_definitions_cache.hash = message['hash'] + event_type = message['eventType'] - self.alert_scheduler_handler.update_definitions() + if event_type == 'CREATE': + self.alert_definitions_cache.rewrite_cache(message['clusters']) + self.alert_definitions_cache.hash = message['hash'] + elif event_type == 'UPDATE': + self.alert_definitions_cache.cache_update(message['clusters']) + self.alert_definitions_cache.hash = message['hash'] + elif event_type == 'DELETE': + self.alert_definitions_cache.cache_delete(message['clusters']) + self.alert_definitions_cache.hash = message['hash'] + else: + logger.error("Unknown event type '{0}' for alert event") + + self.alert_scheduler_handler.update_definitions(event_type) def get_handled_path(self): return Constants.ALERTS_DEFENITIONS_TOPIC \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/main/python/ambari_agent/main.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/main.py b/ambari-agent/src/main/python/ambari_agent/main.py index ece9b7a..05fc5ce 100644 --- a/ambari-agent/src/main/python/ambari_agent/main.py +++ b/ambari-agent/src/main/python/ambari_agent/main.py @@ -153,8 +153,8 @@ def setup_logging(logger, filename, logging_level): logger.setLevel(logging_level) logger.info("loglevel=logging.{0}".format(logging._levelNames[logging_level])) -GRACEFUL_STOP_TRIES = 10 -GRACEFUL_STOP_TRIES_SLEEP = 3 +GRACEFUL_STOP_TRIES = 300 +GRACEFUL_STOP_TRIES_SLEEP = 0.1 def add_syslog_handler(logger): @@ -375,6 +375,8 @@ def run_threads(initializer_module): while not initializer_module.stop_event.is_set(): time.sleep(0.1) + initializer_module.action_queue.interrupt() + command_status_reporter.join() component_status_executor.join() host_status_reporter.join() http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/TestAgentStompResponses.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestAgentStompResponses.py b/ambari-agent/src/test/python/ambari_agent/TestAgentStompResponses.py index 38d0e9e..55c489f 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestAgentStompResponses.py +++ b/ambari-agent/src/test/python/ambari_agent/TestAgentStompResponses.py @@ -33,6 +33,7 @@ from ambari_agent.ComponentStatusExecutor import ComponentStatusExecutor from ambari_agent.CommandStatusReporter import CommandStatusReporter from ambari_agent.HostStatusReporter import HostStatusReporter from ambari_agent.CustomServiceOrchestrator import CustomServiceOrchestrator +from ambari_agent.Utils import Utils from mock.mock import MagicMock, patch @@ -50,19 +51,23 @@ class TestAgentStompResponses(BaseStompServerTestCase): return super(TestAgentStompResponses, self).setUp() + def tearDown(self): + self.initializer_module.stop_event.set() + return super(TestAgentStompResponses, self).tearDown() + @patch.object(CustomServiceOrchestrator, "runCommand") def test_mock_server_can_start(self, runCommand_mock): runCommand_mock.return_value = {'stdout':'...', 'stderr':'...', 'structuredOut' : '{}', 'exitcode':1} - initializer_module = InitializerModule() - heartbeat_thread = HeartbeatThread.HeartbeatThread(initializer_module) + self.initializer_module = InitializerModule() + heartbeat_thread = HeartbeatThread.HeartbeatThread(self.initializer_module) heartbeat_thread.start() - action_queue = initializer_module.action_queue + action_queue = self.initializer_module.action_queue action_queue.start() - initializer_module.alert_scheduler_handler.start() + self.initializer_module.alert_scheduler_handler.start() - component_status_executor = ComponentStatusExecutor(initializer_module) + component_status_executor = ComponentStatusExecutor(self.initializer_module) component_status_executor.start() connect_frame = self.server.frames_queue.get() @@ -96,13 +101,13 @@ class TestAgentStompResponses(BaseStompServerTestCase): initial_host_level_params_request = self.server.frames_queue.get() initial_alert_definitions_request = self.server.frames_queue.get() - while not initializer_module.is_registered: + while not self.initializer_module.is_registered: time.sleep(0.1) - command_status_reporter = CommandStatusReporter(initializer_module) + command_status_reporter = CommandStatusReporter(self.initializer_module) command_status_reporter.start() - host_status_reporter = HostStatusReporter(initializer_module) + host_status_reporter = HostStatusReporter(self.initializer_module) host_status_reporter.start() f = Frame(frames.MESSAGE, headers={'destination': '/user/commands'}, body=self.get_json("execution_commands.json")) @@ -125,7 +130,7 @@ class TestAgentStompResponses(BaseStompServerTestCase): dn_recovery_failed_frame = json.loads(self.server.frames_queue.get().body) host_status_report = json.loads(self.server.frames_queue.get().body) - initializer_module.stop_event.set() + self.initializer_module.stop_event.set() f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '6'}, body=json.dumps({'id':'1'})) self.server.topic_manager.send(f) @@ -137,9 +142,9 @@ class TestAgentStompResponses(BaseStompServerTestCase): action_queue.join() self.assertTrue('mounts' in host_status_report) - self.assertEquals(initializer_module.topology_cache['0']['hosts'][0]['hostName'], 'c6401.ambari.apache.org') - self.assertEquals(initializer_module.metadata_cache['0']['status_commands_to_run'], ('STATUS',)) - self.assertEquals(initializer_module.configurations_cache['0']['configurations']['zoo.cfg']['clientPort'], '2181') + self.assertEquals(self.initializer_module.topology_cache['0']['hosts'][0]['hostName'], 'c6401.ambari.apache.org') + self.assertEquals(self.initializer_module.metadata_cache['0']['status_commands_to_run'], ('STATUS',)) + self.assertEquals(self.initializer_module.configurations_cache['0']['configurations']['zoo.cfg']['clientPort'], '2181') self.assertEquals(dn_install_in_progress_frame['clusters']['0'][0]['roleCommand'], 'INSTALL') self.assertEquals(dn_install_in_progress_frame['clusters']['0'][0]['role'], 'DATANODE') self.assertEquals(dn_install_in_progress_frame['clusters']['0'][0]['status'], 'IN_PROGRESS') @@ -152,24 +157,24 @@ class TestAgentStompResponses(BaseStompServerTestCase): #============================================================================================ - initializer_module = InitializerModule() + self.initializer_module = InitializerModule() self.server.frames_queue.queue.clear() - heartbeat_thread = HeartbeatThread.HeartbeatThread(initializer_module) + heartbeat_thread = HeartbeatThread.HeartbeatThread(self.initializer_module) heartbeat_thread.start() - action_queue = initializer_module.action_queue + action_queue = self.initializer_module.action_queue action_queue.start() - initializer_module.alert_scheduler_handler.start() + self.initializer_module.alert_scheduler_handler.start() - component_status_executor = ComponentStatusExecutor(initializer_module) + component_status_executor = ComponentStatusExecutor(self.initializer_module) component_status_executor.start() - command_status_reporter = CommandStatusReporter(initializer_module) + command_status_reporter = CommandStatusReporter(self.initializer_module) command_status_reporter.start() - host_status_reporter = HostStatusReporter(initializer_module) + host_status_reporter = HostStatusReporter(self.initializer_module) host_status_reporter.start() connect_frame = self.server.frames_queue.get() @@ -202,7 +207,7 @@ class TestAgentStompResponses(BaseStompServerTestCase): heartbeat_frame = self.server.frames_queue.get() status_reports_frame = self.server.frames_queue.get() - initializer_module.stop_event.set() + self.initializer_module.stop_event.set() f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '6'}, body=json.dumps({'id':'1'})) self.server.topic_manager.send(f) @@ -215,8 +220,8 @@ class TestAgentStompResponses(BaseStompServerTestCase): def test_topology_update_and_delete(self): - initializer_module = InitializerModule() - heartbeat_thread = HeartbeatThread.HeartbeatThread(initializer_module) + self.initializer_module = InitializerModule() + heartbeat_thread = HeartbeatThread.HeartbeatThread(self.initializer_module) heartbeat_thread.start() connect_frame = self.server.frames_queue.get() @@ -250,7 +255,7 @@ class TestAgentStompResponses(BaseStompServerTestCase): initial_host_level_params_request = self.server.frames_queue.get() initial_alert_definitions_request = self.server.frames_queue.get() - while not initializer_module.is_registered: + while not self.initializer_module.is_registered: time.sleep(0.1) f = Frame(frames.MESSAGE, headers={'destination': '/events/topologies'}, body=self.get_json("topology_add_component.json")) @@ -275,16 +280,81 @@ class TestAgentStompResponses(BaseStompServerTestCase): self.server.topic_manager.send(f) def is_json_equal(): - json_topology = json.dumps(initializer_module.topology_cache, indent=2, sort_keys=True) - json_excepted_lopology = json.dumps(self.get_dict_from_file("topology_cache_expected.json"), indent=2, sort_keys=True) + #json_topology = json.dumps(self.initializer_module.topology_cache, indent=2, sort_keys=True) + #json_excepted_lopology = json.dumps(self.get_dict_from_file("topology_cache_expected.json"), indent=2, sort_keys=True) #print json_topology #print json_excepted_lopology - self.assertEquals(json_topology, json_excepted_lopology) - #self.assertEquals(initializer_module.topology_cache, self.get_dict_from_file("topology_cache_expected.json")) + self.assertEquals(Utils.get_mutable_copy(self.initializer_module.topology_cache), self.get_dict_from_file("topology_cache_expected.json")) + + self.assert_with_retries(is_json_equal, tries=80, try_sleep=0.1) + + self.initializer_module.stop_event.set() + + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '6'}, body=json.dumps({'id':'1'})) + self.server.topic_manager.send(f) + + heartbeat_thread.join() + + + def test_alert_definitions_update_and_delete(self): + self.initializer_module = InitializerModule() + heartbeat_thread = HeartbeatThread.HeartbeatThread(self.initializer_module) + heartbeat_thread.start() + + connect_frame = self.server.frames_queue.get() + users_subscribe_frame = self.server.frames_queue.get() + registration_frame = self.server.frames_queue.get() + + # server sends registration response + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '0'}, body=self.get_json("registration_response.json")) + self.server.topic_manager.send(f) + + + # response to /initial_topology + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '1'}, body='{}') + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '2'}, body='{}') + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '3'}, body='{}') + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '4'}, body='{}') + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '5'}, body=self.get_json("alert_definitions_small.json")) + self.server.topic_manager.send(f) + + initial_topology_request = self.server.frames_queue.get() + initial_metadata_request = self.server.frames_queue.get() + initial_configs_request = self.server.frames_queue.get() + initial_host_level_params_request = self.server.frames_queue.get() + initial_alert_definitions_request = self.server.frames_queue.get() + + while not self.initializer_module.is_registered: + time.sleep(0.1) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/alert_defenitions'}, body=self.get_json("alert_definitions_add.json")) + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/alert_defenitions'}, body=self.get_json("alert_definitions_edit.json")) + self.server.topic_manager.send(f) + + f = Frame(frames.MESSAGE, headers={'destination': '/user/alert_defenitions'}, body=self.get_json("alert_definitions_delete.json")) + self.server.topic_manager.send(f) + + + def is_json_equal(): + #json_alert_definitions = json.dumps(self.initializer_module.alert_definitions_cache, indent=2, sort_keys=True) + #json_excepted_definitions = json.dumps(self.get_dict_from_file("alert_definition_expected.json"), indent=2, sort_keys=True) + #print json_definitions + #print json_excepted_definitions + self.assertEquals(Utils.get_mutable_copy(self.initializer_module.alert_definitions_cache), self.get_dict_from_file("alert_definition_expected.json")) self.assert_with_retries(is_json_equal, tries=80, try_sleep=0.1) - initializer_module.stop_event.set() + self.initializer_module.stop_event.set() f = Frame(frames.MESSAGE, headers={'destination': '/user/', 'correlationId': '6'}, body=json.dumps({'id':'1'})) self.server.topic_manager.send(f) http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definition_expected.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definition_expected.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definition_expected.json new file mode 100644 index 0000000..997a7fa --- /dev/null +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definition_expected.json @@ -0,0 +1,74 @@ +{ + "0": { + "alertDefinitions": [ + { + "clusterId": 2, + "definitionId": 3, + "description": "This service-level alert is triggered if the configured percentage of RegionServer processes cannot be determined to be up and listening on the network for the configured warning and critical thresholds. It aggregates the results of RegionServer process down checks.", + "enabled": true, + "ignore_host": false, + "interval": 2, + "label": "Percent RegionServers Available", + "name": "hbase_regionserver_process_percent", + "scope": "SERVICE", + "serviceName": "HBASE", + "source": { + "alert_name": "hbase_regionserver_process", + "reporting": { + "critical": { + "text": "affected: [{1}], total: [{0}]", + "value": 30.0 + }, + "ok": { + "text": "affected: [{1}], total: [{0}]" + }, + "type": "PERCENT", + "units": "%", + "warning": { + "text": "affected: [{1}], total: [{0}]", + "value": 10.0 + } + }, + "type": "AGGREGATE" + }, + "uuid": "69ff4c8f-8e98-4cfd-b90f-6914e2f147ff" + }, + { + "clusterId": 2, + "componentName": "HBASE_MASTER", + "definitionId": 1, + "description": "This alert is triggered if the HBase master processes cannot be confirmed to be up and listening on the network for the configured critical threshold, given in seconds.", + "enabled": true, + "ignore_host": false, + "interval": 1, + "label": "HBase Master Process", + "name": "hbase_master_process", + "scope": "ANY", + "serviceName": "HBASE", + "source": { + "default_port": 60000, + "reporting": { + "critical": { + "text": "Connection failed: {0} to {1}:{2}", + "value": 5.0 + }, + "ok": { + "text": "TCP OK - {0:.3f}s response on port {1}" + }, + "warning": { + "text": "TCP OK - {0:.3f}s response on port {1}", + "value": 1.5 + } + }, + "type": "PORT", + "uri": "{{hbase-site/hbase.master.port}}" + }, + "uuid": "ff73ead7-13b4-43ea-a747-d230f17bf230" + } + ], + "clusterName": "cl1", + "hash": "8f7b4e960133bc691661cbcdaddddec8", + "hostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "publicHostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site" + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions.json index cc21244..eca4c44 100644 --- a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions.json +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions.json @@ -1,5 +1,6 @@ { "hash": "37fe2bd73438980c619c2b8c2f95d160", + "eventType": "CREATE", "clusters": { "0": { "hash": "8f7b4e960133bc691661cbcdaddddec8", @@ -8,38 +9,6 @@ "publicHostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", "alertDefinitions": [{ "ignore_host": false, - "name": "hbase_master_process", - "componentName": "HBASE_MASTER", - "interval": 1, - "clusterId": 2, - "uuid": "ff73ead7-13b4-43ea-a747-d230f17bf230", - "label": "HBase Master Process", - "definitionId": 1, - "source": { - "reporting": { - "warning": { - "text": "TCP OK - {0:.3f}s response on port {1}", - "value": 1.5 - }, - "ok": { - "text": "TCP OK - {0:.3f}s response on port {1}" - }, - "critical": { - "text": "Connection failed: {0} to {1}:{2}", - "value": 5.0 - } - }, - "type": "PORT", - "uri": "{{hbase-site/hbase.master.port}}", - "default_port": 60000 - }, - "serviceName": "HBASE", - "scope": "ANY", - "enabled": true, - "description": "This alert is triggered if the HBase master processes cannot be confirmed to be up and listening on the network for the configured critical threshold, given in seconds." - }, - { - "ignore_host": false, "name": "hbase_master_cpu", "componentName": "HBASE_MASTER", "interval": 5, http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_add.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_add.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_add.json new file mode 100644 index 0000000..a0ddeb4 --- /dev/null +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_add.json @@ -0,0 +1,45 @@ +{ + "hash": "c3046b031f8e27facf803151cddf0619", + "eventType": "UPDATE", + "clusters": { + "0": { + "hash": "8f7b4e960133bc691661cbcdaddddec8", + "clusterName": "cl1", + "hostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "publicHostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "alertDefinitions": [{ + "ignore_host": false, + "name": "hbase_master_process", + "componentName": "HBASE_MASTER", + "interval": 1, + "clusterId": 2, + "uuid": "ff73ead7-13b4-43ea-a747-d230f17bf230", + "label": "HBase Master Process", + "definitionId": 1, + "source": { + "reporting": { + "warning": { + "text": "TCP OK - {0:.3f}s response on port {1}", + "value": 1.5 + }, + "ok": { + "text": "TCP OK - {0:.3f}s response on port {1}" + }, + "critical": { + "text": "Connection failed: {0} to {1}:{2}", + "value": 5.0 + } + }, + "type": "PORT", + "uri": "{{hbase-site/hbase.master.port}}", + "default_port": 60000 + }, + "serviceName": "HBASE", + "scope": "ANY", + "enabled": true, + "description": "This alert is triggered if the HBase master processes cannot be confirmed to be up and listening on the network for the configured critical threshold, given in seconds." + } + ] + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_delete.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_delete.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_delete.json new file mode 100644 index 0000000..252114b --- /dev/null +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_delete.json @@ -0,0 +1,60 @@ +{ + "hash": "11c34a65239dde7ce330b1eb7819b788", + "eventType": "DELETE", + "clusters": { + "0": { + "hash": "8f7b4e960133bc691661cbcdaddddec8", + "clusterName": "cl1", + "hostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "publicHostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "alertDefinitions": [{ + "ignore_host": false, + "name": "hbase_master_cpu", + "componentName": "HBASE_MASTER", + "interval": 5, + "clusterId": 2, + "uuid": "6c891177-b32f-47c8-befb-3846049f98e8", + "label": "HBase Master CPU Utilization", + "definitionId": 2, + "source": { + "jmx": { + "value": "{0} * 100", + "property_list": [ + "java.lang:type=OperatingSystem/SystemCpuLoad", + "java.lang:type=OperatingSystem/AvailableProcessors" + ] + }, + "reporting": { + "units": "%", + "type": "PERCENT", + "warning": { + "text": "{1} CPU, load {0:.1%}", + "value": 200.0 + }, + "ok": { + "text": "{1} CPU, load {0:.1%}" + }, + "critical": { + "text": "{1} CPU, load {0:.1%}", + "value": 250.0 + } + }, + "type": "METRIC", + "uri": { + "connection_timeout": 5.0, + "default_port": 60010, + "http": "{{hbase-site/hbase.master.info.port}}", + "kerberos_principal": "{{hbase-site/hbase.security.authentication.spnego.kerberos.principal}}", + "kerberos_keytab": "{{hbase-site/hbase.security.authentication.spnego.kerberos.keytab}}" + } + }, + "serviceName": "HBASE", + "scope": "ANY", + "enabled": true, + "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property. The threshold values are in percent." + } + ] + } + + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_edit.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_edit.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_edit.json new file mode 100644 index 0000000..36f1880 --- /dev/null +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_edit.json @@ -0,0 +1,44 @@ +{ + "hash": "5b6d27552afd90c575dff3eeecdfb984", + "eventType": "UPDATE", + "clusters": { + "0": { + "hash": "8f7b4e960133bc691661cbcdaddddec8", + "alertDefinitions": [ + { + "ignore_host": false, + "name": "hbase_regionserver_process_percent", + "enabled": true, + "interval": 2, + "clusterId": 2, + "uuid": "69ff4c8f-8e98-4cfd-b90f-6914e2f147ff", + "label": "Percent RegionServers Available", + "definitionId": 3, + "source": { + "alert_name": "hbase_regionserver_process", + "reporting": { + "units": "%", + "type": "PERCENT", + "warning": { + "text": "affected: [{1}], total: [{0}]", + "value": 10.0 + }, + "ok": { + "text": "affected: [{1}], total: [{0}]" + }, + "critical": { + "text": "affected: [{1}], total: [{0}]", + "value": 30.0 + } + }, + "type": "AGGREGATE" + }, + "serviceName": "HBASE", + "scope": "SERVICE", + "description": "This service-level alert is triggered if the configured percentage of RegionServer processes cannot be determined to be up and listening on the network for the configured warning and critical thresholds. It aggregates the results of RegionServer process down checks." + } + ] + } + + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/7fb94cb8/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_small.json ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_small.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_small.json new file mode 100644 index 0000000..b4cf7bf --- /dev/null +++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/stomp/alert_definitions_small.json @@ -0,0 +1,92 @@ +{ + "hash": "37fe2bd73438980c619c2b8c2f95d160", + "eventType": "CREATE", + "clusters": { + "0": { + "hash": "8f7b4e960133bc691661cbcdaddddec8", + "clusterName": "cl1", + "hostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "publicHostName": "ctr-e134-1499953498516-81665-01-000008.hwx.site", + "alertDefinitions": [{ + "ignore_host": false, + "name": "hbase_master_cpu", + "componentName": "HBASE_MASTER", + "interval": 5, + "clusterId": 2, + "uuid": "6c891177-b32f-47c8-befb-3846049f98e8", + "label": "HBase Master CPU Utilization", + "definitionId": 2, + "source": { + "jmx": { + "value": "{0} * 100", + "property_list": [ + "java.lang:type=OperatingSystem/SystemCpuLoad", + "java.lang:type=OperatingSystem/AvailableProcessors" + ] + }, + "reporting": { + "units": "%", + "type": "PERCENT", + "warning": { + "text": "{1} CPU, load {0:.1%}", + "value": 200.0 + }, + "ok": { + "text": "{1} CPU, load {0:.1%}" + }, + "critical": { + "text": "{1} CPU, load {0:.1%}", + "value": 250.0 + } + }, + "type": "METRIC", + "uri": { + "connection_timeout": 5.0, + "default_port": 60010, + "http": "{{hbase-site/hbase.master.info.port}}", + "kerberos_principal": "{{hbase-site/hbase.security.authentication.spnego.kerberos.principal}}", + "kerberos_keytab": "{{hbase-site/hbase.security.authentication.spnego.kerberos.keytab}}" + } + }, + "serviceName": "HBASE", + "scope": "ANY", + "enabled": true, + "description": "This host-level alert is triggered if CPU utilization of the HBase Master exceeds certain warning and critical thresholds. It checks the HBase Master JMX Servlet for the SystemCPULoad property. The threshold values are in percent." + }, + { + "ignore_host": false, + "name": "hbase_regionserver_process_percent", + "enabled": true, + "interval": 1, + "clusterId": 2, + "uuid": "69ff4c8f-8e98-4cfd-b90f-6914e2f147ff", + "label": "Percent RegionServers Available", + "definitionId": 3, + "source": { + "alert_name": "hbase_regionserver_process", + "reporting": { + "units": "%", + "type": "PERCENT", + "warning": { + "text": "affected: [{1}], total: [{0}]", + "value": 10.0 + }, + "ok": { + "text": "affected: [{1}], total: [{0}]" + }, + "critical": { + "text": "affected: [{1}], total: [{0}]", + "value": 30.0 + } + }, + "type": "AGGREGATE" + }, + "serviceName": "HBASE", + "scope": "SERVICE", + "description": "This service-level alert is triggered if the configured percentage of RegionServer processes cannot be determined to be up and listening on the network for the configured warning and critical thresholds. It aggregates the results of RegionServer process down checks." + } + ] + } + + } +} \ No newline at end of file
