Repository: ambari Updated Branches: refs/heads/trunk 241092948 -> 449be0f5c
AMBARI-15647 - Alerts Using the SKIPPED State Cause Stale Alert Notification (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/449be0f5 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/449be0f5 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/449be0f5 Branch: refs/heads/trunk Commit: 449be0f5c8c3a06b2f414d53e91136f66c0f1743 Parents: 2410929 Author: Jonathan Hurley <[email protected]> Authored: Thu Mar 31 09:31:33 2016 -0400 Committer: Jonathan Hurley <[email protected]> Committed: Thu Mar 31 15:18:53 2016 -0400 ---------------------------------------------------------------------- .../python/ambari_agent/alerts/base_alert.py | 9 -- .../src/test/python/ambari_agent/TestAlerts.py | 7 +- .../listeners/alerts/AlertReceivedListener.java | 20 ++- .../apache/ambari/server/state/AlertState.java | 11 +- .../state/alerts/AlertReceivedListenerTest.java | 160 ++++++++++++++----- 5 files changed, 148 insertions(+), 59 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/449be0f5/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py index 92db07c..6c8ca5a 100644 --- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py +++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py @@ -113,15 +113,6 @@ class BaseAlert(object): result_state = res[0] reporting_state = result_state.lower() - # if the alert reports that it should be SKIPPED, then skip it - # this is useful for cases where the alert might run on multiple hosts - # but only 1 host should report the data - if result_state == BaseAlert.RESULT_SKIPPED: - logger.debug('[Alert][{0}] Skipping UUID {1}.'.format(self.get_name(), - self.get_uuid())) - - return - # it's possible that the alert definition doesn't have reporting; safely # check for it and fallback to default text if it doesn't exist if ('reporting' in self.alert_source_meta) and \ http://git-wip-us.apache.org/repos/asf/ambari/blob/449be0f5/ambari-agent/src/test/python/ambari_agent/TestAlerts.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py index bf56703..e5f6a41 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py +++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py @@ -700,13 +700,16 @@ class TestAlerts(TestCase): alert.set_helpers(collector, cluster_configuration ) alert.set_cluster("c1", "c6401.ambari.apache.org") + alert.collect() + self.assertEquals(definition_json['source']['path'], alert.path) self.assertEquals(definition_json['source']['stacks_directory'], alert.stacks_dir) self.assertEquals(definition_json['source']['common_services_directory'], alert.common_services_dir) self.assertEquals(definition_json['source']['host_scripts_directory'], alert.host_scripts_dir) - # ensure that it was skipped - self.assertEquals(0,len(collector.alerts())) + # ensure that the skipped alert was still placed into the collector; it's up to + # the server to decide how to handle skipped alerts + self.assertEquals(1,len(collector.alerts())) def test_default_reporting_text(self): http://git-wip-us.apache.org/repos/asf/ambari/blob/449be0f5/ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java b/ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java index f6aa1aa..a3befa6 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java @@ -165,6 +165,7 @@ public class AlertReceivedListener { } AlertCurrentEntity current; + AlertState alertState = alert.getState(); if (StringUtils.isBlank(alert.getHostName()) || definition.isHostIgnored()) { current = m_alertsDao.findCurrentByNameNoHost(clusterId, alert.getName()); @@ -174,6 +175,12 @@ public class AlertReceivedListener { } if (null == current) { + // if there is no current alert and the state is skipped, then simple + // skip over this one as there is nothing to update in the databse + if (alertState == AlertState.SKIPPED) { + continue; + } + AlertHistoryEntity history = createHistory(clusterId, definition, alert); // this new alert must reflect the correct MM state for the @@ -194,11 +201,18 @@ public class AlertReceivedListener { toCreate.put(alert, current); - } else if (alert.getState() == current.getAlertHistory().getAlertState()) { + } else if (alertState == current.getAlertHistory().getAlertState() + || alertState == AlertState.SKIPPED) { + + // update the timestamp current.setLatestTimestamp(alert.getTimestamp()); - current.setLatestText(alert.getText()); - toMerge.put(alert, current); + // only update the text if the alert isn't being skipped + if (alertState != AlertState.SKIPPED) { + current.setLatestText(alert.getText()); + } + + toMerge.put(alert, current); } else { if (LOG.isDebugEnabled()) { LOG.debug( http://git-wip-us.apache.org/repos/asf/ambari/blob/449be0f5/ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java b/ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java index 8cc245a..3bb70e2 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java @@ -25,17 +25,26 @@ public enum AlertState { * Alert does not need to be distributed. Normal Operation. */ OK, + /** * Alert indicates there may be an issue. The component may be operating * normally but may be in danger of becoming <code>CRITICAL</code>. */ WARNING, + /** * Indicates there is a critical situation that needs to be addressed. */ CRITICAL, + /** * The state of the alert is not known. */ - UNKNOWN + UNKNOWN, + + /** + * Indicates that the state of the alert should be discarded, but the alert + * timestamps should be updated so that it is not considered stale. + */ + SKIPPED; } http://git-wip-us.apache.org/repos/asf/ambari/blob/449be0f5/ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java b/ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java index 775fe83..0d893af 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java @@ -169,25 +169,24 @@ public class AlertReceivedListenerTest { String definitionName = ALERT_DEFINITION + "1"; String componentName = "DATANODE"; - Alert alert1 = new Alert(definitionName, null, "HDFS", componentName, + Alert alert = new Alert(definitionName, null, "HDFS", componentName, HOST1, AlertState.OK); - alert1.setCluster(m_cluster.getClusterName()); - alert1.setLabel(ALERT_LABEL); - alert1.setText("HDFS " + componentName + " is OK"); - alert1.setTimestamp(1L); + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText("HDFS " + componentName + " is OK"); + alert.setTimestamp(1L); // verify that the listener works with a regular alert AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); - AlertReceivedEvent event1 = new AlertReceivedEvent( - m_cluster.getClusterId(), alert1); - listener.onAlertEvent(event1); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert); + listener.onAlertEvent(event); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); // invalid host - alert1.setHostName("INVALID"); + alert.setHostName("INVALID"); // remove all m_dao.removeCurrentByHost(HOST1); @@ -195,7 +194,7 @@ public class AlertReceivedListenerTest { assertEquals(0, allCurrent.size()); // verify no new alerts for disabled - listener.onAlertEvent(event1); + listener.onAlertEvent(event); allCurrent = m_dao.findCurrent(); assertEquals(0, allCurrent.size()); } @@ -207,17 +206,17 @@ public class AlertReceivedListenerTest { public void testInvalidAlertDefinition() { String componentName = "DATANODE"; - Alert alert1 = new Alert("missing_alert_definition_name", null, "HDFS", + Alert alert = new Alert("missing_alert_definition_name", null, "HDFS", componentName, HOST1, AlertState.OK); - alert1.setLabel(ALERT_LABEL); - alert1.setText("HDFS " + componentName + " is OK"); - alert1.setTimestamp(1L); + alert.setLabel(ALERT_LABEL); + alert.setText("HDFS " + componentName + " is OK"); + alert.setTimestamp(1L); // bad alert definition name means no current alerts AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); AlertReceivedEvent event1 = new AlertReceivedEvent( - m_cluster.getClusterId(), alert1); + m_cluster.getClusterId(), alert); listener.onAlertEvent(event1); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); @@ -232,25 +231,25 @@ public class AlertReceivedListenerTest { String definitionName = ALERT_DEFINITION + "1"; String componentName = "DATANODE"; - Alert alert1 = new Alert(definitionName, null, "HDFS", componentName, + Alert alert = new Alert(definitionName, null, "HDFS", componentName, HOST1, AlertState.OK); - alert1.setCluster(m_cluster.getClusterName()); - alert1.setLabel(ALERT_LABEL); - alert1.setText("HDFS " + componentName + " is OK"); - alert1.setTimestamp(1L); + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText("HDFS " + componentName + " is OK"); + alert.setTimestamp(1L); // verify that the listener works with a regular alert AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); AlertReceivedEvent event1 = new AlertReceivedEvent( - m_cluster.getClusterId(), alert1); + m_cluster.getClusterId(), alert); listener.onAlertEvent(event1); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); // invalid host - alert1.setComponent("INVALID"); + alert.setComponent("INVALID"); // remove all m_dao.removeCurrentByHost(HOST1); @@ -318,24 +317,24 @@ public class AlertReceivedListenerTest { String serviceName = Services.AMBARI.name(); String componentName = Components.AMBARI_AGENT.name(); - Alert alert1 = new Alert(definitionName, null, serviceName, componentName, HOST1, + Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.OK); - alert1.setCluster(m_cluster.getClusterName()); - alert1.setLabel(ALERT_LABEL); - alert1.setText(serviceName + " " + componentName + " is OK"); - alert1.setTimestamp(1L); + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText(serviceName + " " + componentName + " is OK"); + alert.setTimestamp(1L); // verify that the listener works with a regular alert AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); - AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1); - listener.onAlertEvent(event1); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert); + listener.onAlertEvent(event); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); // invalid host - alert1.setHostName("INVALID"); + alert.setHostName("INVALID"); // remove all m_dao.removeCurrentByHost(HOST1); @@ -343,7 +342,7 @@ public class AlertReceivedListenerTest { assertEquals(0, allCurrent.size()); // verify no new alerts received - listener.onAlertEvent(event1); + listener.onAlertEvent(event); allCurrent = m_dao.findCurrent(); assertEquals(0, allCurrent.size()); } @@ -357,25 +356,25 @@ public class AlertReceivedListenerTest { String serviceName = Services.AMBARI.name(); String componentName = Components.AMBARI_SERVER.name(); - Alert alert1 = new Alert(definitionName, null, serviceName, componentName, HOST1, + Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.OK); - alert1.setCluster(m_cluster.getClusterName()); - alert1.setLabel(ALERT_LABEL); - alert1.setText(serviceName + " " + componentName + " is OK"); - alert1.setTimestamp(1L); + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText(serviceName + " " + componentName + " is OK"); + alert.setTimestamp(1L); // verify that the listener works with a regular alert AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); - AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1); - listener.onAlertEvent(event1); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert); + listener.onAlertEvent(event); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); // invalid host, invalid cluster - alert1.setHostName("INVALID"); - alert1.setCluster("INVALID"); + alert.setHostName("INVALID"); + alert.setCluster("INVALID"); // remove all m_dao.removeCurrentByHost(HOST1); @@ -383,7 +382,7 @@ public class AlertReceivedListenerTest { assertEquals(0, allCurrent.size()); // verify that the alert was still received - listener.onAlertEvent(event1); + listener.onAlertEvent(event); allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); } @@ -408,8 +407,8 @@ public class AlertReceivedListenerTest { // verify that the listener works with a regular alert AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); - AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1); - listener.onAlertEvent(event1); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert1); + listener.onAlertEvent(event); List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); assertEquals(1, allCurrent.size()); @@ -424,8 +423,81 @@ public class AlertReceivedListenerTest { assertEquals(0, allCurrent.size()); // verify no new alerts received - listener.onAlertEvent(event1); + listener.onAlertEvent(event); allCurrent = m_dao.findCurrent(); assertEquals(0, allCurrent.size()); } + + /** + * Tests that receiving and alert with {@link AlertState#SKIPPED} does not create an entry + * if there is currently no current alert. + */ + @Test + public void testSkippedAlertWithNoCurrentAlert() { + String definitionName = ALERT_DEFINITION + "1"; + String serviceName = "HDFS"; + String componentName = "NAMENODE"; + + Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.SKIPPED); + + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText(serviceName + " " + componentName + " is OK"); + alert.setTimestamp(1L); + + // fire the alert, and check that nothing gets created + AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert); + listener.onAlertEvent(event); + + List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); + assertEquals(0, allCurrent.size()); + } + + /** + * Tests that receiving and alert with {@link AlertState#SKIPPED} does not + * create an entry if there is currently no current alert. + */ + @Test + public void testSkippedAlertUpdatesTimestamp() { + String definitionName = ALERT_DEFINITION + "1"; + String serviceName = "HDFS"; + String componentName = "NAMENODE"; + String text = serviceName + " " + componentName + " is OK"; + + Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.OK); + + alert.setCluster(m_cluster.getClusterName()); + alert.setLabel(ALERT_LABEL); + alert.setText(text); + alert.setTimestamp(1L); + + // fire the alert, and check that the new entry was created with the right + // timestamp + AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class); + AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert); + listener.onAlertEvent(event); + + List<AlertCurrentEntity> allCurrent = m_dao.findCurrent(); + assertEquals(1, allCurrent.size()); + + // check timestamp + assertEquals(1L, (long) allCurrent.get(0).getOriginalTimestamp()); + assertEquals(1L, (long) allCurrent.get(0).getLatestTimestamp()); + + // update the timestamp and the state + alert.setState(AlertState.SKIPPED); + alert.setTimestamp(2L); + + // the logic we have does NOT update the text, so make sure this does not + // change + alert.setText("INVALID"); + + // get the current make sure the fields were updated + listener.onAlertEvent(event); + allCurrent = m_dao.findCurrent(); + assertEquals(1L, (long) allCurrent.get(0).getOriginalTimestamp()); + assertEquals(2L, (long) allCurrent.get(0).getLatestTimestamp()); + assertEquals(text, allCurrent.get(0).getLatestText()); + } }
