Repository: ambari Updated Branches: refs/heads/trunk 3b62427a4 -> b94eb716b
AMBARI-6421. Add capability to report alert data from agents to Nagios (ncole) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b94eb716 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b94eb716 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b94eb716 Branch: refs/heads/trunk Commit: b94eb716ba3228be6d39f0fc9009f8e63d146c89 Parents: 3b62427 Author: Nate Cole <nc...@hortonworks.com> Authored: Tue Jul 8 13:24:43 2014 -0400 Committer: Nate Cole <nc...@hortonworks.com> Committed: Wed Jul 9 16:21:48 2014 -0400 ---------------------------------------------------------------------- .../ambari/server/agent/HeartbeatMonitor.java | 41 ++++++++- .../ambari/server/agent/NagiosAlertCommand.java | 44 ++++++++++ .../nagios/NagiosPropertyProvider.java | 11 --- .../NAGIOS/package/files/check_ambari_alerts.py | 83 ++++++++++++++++++ .../NAGIOS/package/scripts/nagios_server.py | 20 ++--- .../package/scripts/nagios_server_config.py | 1 + .../NAGIOS/package/scripts/nagios_service.py | 36 +++++++- .../services/NAGIOS/package/scripts/params.py | 4 +- .../NAGIOS/package/scripts/status_params.py | 3 + .../package/templates/hadoop-commands.cfg.j2 | 5 ++ .../templates/hadoop-servicegroups.cfg.j2 | 7 ++ .../package/templates/hadoop-services.cfg.j2 | 15 ++++ .../server/agent/TestHeartbeatMonitor.java | 92 ++++++++++++++++++++ .../nagios/NagiosPropertyProviderTest.java | 78 ----------------- .../stacks/2.0.6/NAGIOS/test_nagios_server.py | 5 ++ 15 files changed, 340 insertions(+), 105 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java index 5fa4062..959ee5a 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java @@ -17,26 +17,51 @@ */ package org.apache.ambari.server.agent; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.GLOBAL; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.HOOKS_FOLDER; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT_TYPE; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SERVICE_PACKAGE_FOLDER; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_NAME; +import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_VERSION; + import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; -import com.google.inject.Injector; import org.apache.ambari.server.AmbariException; import org.apache.ambari.server.actionmanager.ActionManager; import org.apache.ambari.server.api.services.AmbariMetaInfo; import org.apache.ambari.server.configuration.Configuration; import org.apache.ambari.server.controller.AmbariManagementController; import org.apache.ambari.server.controller.MaintenanceStateHelper; -import org.apache.ambari.server.state.*; +import org.apache.ambari.server.state.Alert; +import org.apache.ambari.server.state.Cluster; +import org.apache.ambari.server.state.Clusters; +import org.apache.ambari.server.state.CommandScriptDefinition; +import org.apache.ambari.server.state.ComponentInfo; +import org.apache.ambari.server.state.Config; +import org.apache.ambari.server.state.ConfigHelper; +import org.apache.ambari.server.state.Host; +import org.apache.ambari.server.state.HostState; +import org.apache.ambari.server.state.Service; +import org.apache.ambari.server.state.ServiceComponent; +import org.apache.ambari.server.state.ServiceComponentHost; +import org.apache.ambari.server.state.ServiceInfo; +import org.apache.ambari.server.state.StackId; +import org.apache.ambari.server.state.StackInfo; +import org.apache.ambari.server.state.State; import org.apache.ambari.server.state.fsm.InvalidStateTransitionException; import org.apache.ambari.server.state.host.HostHeartbeatLostEvent; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.*; +import com.google.inject.Injector; /** * Monitors the node state and heartbeats. @@ -251,6 +276,16 @@ public class HeartbeatMonitor implements Runnable { } StatusCommand statusCmd = new StatusCommand(); + if (sch.getServiceComponentName().equals("NAGIOS_SERVER")) { + // this requires special treatment + + Collection<Alert> alerts = cluster.getAlerts(); + if (null != alerts && alerts.size() > 0) { + statusCmd = new NagiosAlertCommand(); + ((NagiosAlertCommand) statusCmd).setAlerts(alerts); + } + } + statusCmd.setClusterName(cluster.getClusterName()); statusCmd.setServiceName(serviceName); statusCmd.setComponentName(componentName); http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java new file mode 100644 index 0000000..f8e2f26 --- /dev/null +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/NagiosAlertCommand.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ambari.server.agent; + +import java.util.Collection; + +import org.apache.ambari.server.state.Alert; + +/** + * Specialized command that updates Nagios with alert data + */ +public class NagiosAlertCommand extends StatusCommand { + private Collection<Alert> alerts = null; + + /** + * @param alerts + */ + public void setAlerts(Collection<Alert> alertData) { + alerts = alertData; + } + + /** + * @return the alerts + */ + public Collection<Alert> getAlerts() { + return alerts; + } + +} http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java index ce1dc2f..7c40ed1 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -50,7 +49,6 @@ import org.apache.ambari.server.controller.spi.Request; import org.apache.ambari.server.controller.spi.Resource; import org.apache.ambari.server.controller.spi.SystemException; import org.apache.ambari.server.controller.utilities.StreamProvider; -import org.apache.ambari.server.state.Alert; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Clusters; import org.apache.ambari.server.state.Service; @@ -372,15 +370,6 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv if (!hosts.isEmpty()) nagiosHost = hosts.keySet().iterator().next(); - // !!! use the cluster to retrieve alerts that are not from Nagios, but - // from agents themselves. - Collection<Alert> currentAlerts = cluster.getAlerts(); - if (null != currentAlerts) { - for (Alert alert : currentAlerts) { - results.add(new NagiosAlert(alert)); - } - } - } catch (AmbariException e) { LOG.debug("Cannot find a nagios service. Skipping alerts."); } http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py new file mode 100644 index 0000000..912a209 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_ambari_alerts.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +import os +import optparse +import json +import traceback + +def main(): + + parser = optparse.OptionParser() + + parser.add_option("-H", "--host", dest="host", default="localhost", help="NameNode host") + parser.add_option("-n", "--name", dest="alert_name", help="Alert name to check") + parser.add_option("-f", "--file", dest="alert_file", help="File containing the alert structure") + + (options, args) = parser.parse_args() + + if options.alert_name is None: + print "Alert name is required (--name or -n)" + exit(-1) + + if options.alert_file is None: + print "Alert file is required (--file or -f)" + exit(-1) + + if not os.path.exists(options.alert_file): + print "Alert file must exist" + exit(-1) + + try: + with open(options.alert_file, 'r') as f: + data = json.load(f) + + first = True + buf = '' + + for_hosts = data[options.alert_name] + if for_hosts.has_key(options.host): + for host_entry in for_hosts[options.host]: + alert_state = host_entry['state'] + alert_text = host_entry['text'] + if alert_state == 'CRITICAL': + print str(alert_text) + exit(2) + elif alert_state == 'WARNING': + print str(alert_text) + exit(1) + else: + if not first: + buf = buf + ', ' + buf = buf + alert_text + first = False + + print buf + exit(0) + + except Exception: + traceback.print_exc() + exit(3) + +if __name__ == "__main__": + main() + http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py index ed7cebc..da35b34 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py @@ -24,6 +24,7 @@ import sys from resource_management import * from nagios import nagios from nagios_service import nagios_service +from nagios_service import update_active_alerts class NagiosServer(Script): @@ -59,21 +60,20 @@ class NagiosServer(Script): import status_params env.set_params(status_params) check_process_status(status_params.nagios_pid_file) + + # check for alert structures + update_active_alerts() + def remove_conflicting_packages(): - Package( 'hdp_mon_nagios_addons', - action = "remove" - ) + Package('hdp_mon_nagios_addons', action = "remove") - Package( 'nagios-plugins', - action = "remove" - ) + Package('nagios-plugins', action = "remove") if System.get_instance().os_family in ["redhat","suse"]: - Execute( "rpm -e --allmatches --nopostun nagios", - path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - ignore_failures = True - ) + Execute("rpm -e --allmatches --nopostun nagios", + path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + ignore_failures = True) def update_ignorable(params): if not params.config.has_key('passiveInfo'): http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py index 564f78e..40a1bb6 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py @@ -69,6 +69,7 @@ def nagios_server_config(): nagios_server_check( 'hdp_nagios_init.php') nagios_server_check( 'check_checkpoint_time.py' ) nagios_server_check( 'sys_logger.py' ) + nagios_server_check( 'check_ambari_alerts.py' ) def nagios_server_configfile( name, http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py index 8f47702..b7f512b 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_service.py @@ -20,6 +20,7 @@ Ambari Agent """ +import json import os import signal @@ -66,4 +67,37 @@ def nagios_service(action='start'): # start or stop if isfile( nagios_pid_file ): Execute(format("rm -f {nagios_pid_file}")) - MonitorWebserver("restart") \ No newline at end of file + MonitorWebserver("restart") + +def update_active_alerts(): + import status_params + + alerts = None + if 'alerts' in status_params.config and status_params.config['alerts'] is not None: + alerts = status_params.config['alerts'] + + if alerts is None: + return + + output = {} + + for a in alerts: + alert_name = a['name'] + alert_text = a['text'] + alert_state = a['state'] + alert_host = a['host'] + if not output.has_key(alert_name): + output[alert_name] = {} + + if not output[alert_name].has_key(alert_host): + output[alert_name][alert_host] = [] + + host_items = output[alert_name][alert_host] + alert_out = {} + alert_out['state'] = alert_state + alert_out['text'] = alert_text + host_items.append(alert_out) + + with open(os.path.join(status_params.nagios_var_dir, 'ambari.json'), 'w') as f: + json.dump(output, f) + http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py index 136255d..edb9b04 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py @@ -34,8 +34,8 @@ else: conf_dir = format("/etc/{nagios_service_name}") nagios_obj_dir = format("{conf_dir}/objects") -nagios_var_dir = "/var/nagios" -nagios_rw_dir = "/var/nagios/rw" +nagios_var_dir = status_params.nagios_var_dir +nagios_rw_dir = status_params.nagios_rw_dir if System.get_instance().os_family == "debian": host_template = "generic-host" http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py index 33b35fe..11d4aa9 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/status_params.py @@ -24,3 +24,6 @@ config = Script.get_config() nagios_pid_dir = "/var/run/nagios" nagios_pid_file = format("{nagios_pid_dir}/nagios.pid") + +nagios_var_dir = "/var/nagios" +nagios_rw_dir = "/var/nagios/rw" http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 index caf0ff4..bc5b2df 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 @@ -149,3 +149,8 @@ define command{ command_name check_tcp_wrapper_sasl command_line $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$ -s \"$ARG3$\" } + +define command{ + command_name check_ambari + command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_ambari_alerts.py -H $HOSTADDRESS$ -f $ARG1$ -n $ARG2$ + } http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 index 3173072..00f0740 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 @@ -110,3 +110,10 @@ define servicegroup { alias FALCON Checks } {% endif %} + +{%if hostgroup_defs['flume-servers'] %} +define servicegroup { + servicegroup_name FLUME + alias FLUME Checks +} +{% endif %} http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 index aa626bf..2b2df1e 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 @@ -777,3 +777,18 @@ define service { max_check_attempts 3 } {% endif %} + +{% if hostgroup_defs['flume-servers'] %} +# FLUME Checks +define service { + hostgroup_name flume-servers + use hadoop-service + service_description FLUME::Flume Agent process + servicegroups FLUME + check_command check_ambari!/var/nagios/ambari.json!flume_agent + normal_check_interval 1 + retry_check_interval 0.5 + max_check_attempts 3 +} +{% endif %} + http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java index 65d5a96..856baee 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -37,6 +38,8 @@ import org.apache.ambari.server.actionmanager.ActionManager; import org.apache.ambari.server.api.services.AmbariMetaInfo; import org.apache.ambari.server.orm.GuiceJpaInitializer; import org.apache.ambari.server.orm.InMemoryDefaultTestModule; +import org.apache.ambari.server.state.Alert; +import org.apache.ambari.server.state.AlertState; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Clusters; import org.apache.ambari.server.state.Config; @@ -501,4 +504,93 @@ public class TestHeartbeatMonitor { sch.getState()); } } + + @Test + public void testStateCommandsWithAlertsGeneration() throws AmbariException, InterruptedException, + InvalidStateTransitionException { + Clusters clusters = injector.getInstance(Clusters.class); + + clusters.addHost(hostname1); + setOsFamily(clusters.getHost(hostname1), "redhat", "6.3"); + clusters.getHost(hostname1).persist(); + + clusters.addHost(hostname2); + setOsFamily(clusters.getHost(hostname2), "redhat", "6.3"); + clusters.getHost(hostname2).persist(); + clusters.addCluster(clusterName); + + Cluster cluster = clusters.getCluster(clusterName); + cluster.setDesiredStackVersion(new StackId("HDP-2.0.7")); + Set<String> hostNames = new HashSet<String>(){{ + add(hostname1); + add(hostname2); + }}; + + clusters.mapHostsToCluster(hostNames, clusterName); + + Service hdfs = cluster.addService(serviceName); + Service nagios = cluster.addService("NAGIOS"); + + hdfs.persist(); + nagios.persist(); + + hdfs.addServiceComponent(Role.DATANODE.name()).persist(); + hdfs.getServiceComponent(Role.DATANODE.name()).addServiceComponentHost(hostname1).persist(); + hdfs.addServiceComponent(Role.NAMENODE.name()).persist(); + hdfs.getServiceComponent(Role.NAMENODE.name()).addServiceComponentHost(hostname1).persist(); + hdfs.addServiceComponent(Role.SECONDARY_NAMENODE.name()).persist(); + hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).addServiceComponentHost(hostname1).persist(); + nagios.addServiceComponent(Role.NAGIOS_SERVER.name()).persist(); + nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).addServiceComponentHost(hostname1).persist(); + + hdfs.getServiceComponent(Role.DATANODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED); + hdfs.getServiceComponent(Role.NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED); + hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED); + nagios.getServiceComponent(Role.NAGIOS_SERVER.name()).getServiceComponentHost(hostname1).setState(State.INSTALLED); + + + Alert alert = new Alert("datanode_madeup", null, "HDFS", "DATANODE", + hostname1, AlertState.CRITICAL); + cluster.addAlerts(Collections.singleton(alert)); + + ActionQueue aq = new ActionQueue(); + ActionManager am = mock(ActionManager.class); + HeartbeatMonitor hm = new HeartbeatMonitor(clusters, aq, am, + heartbeatMonitorWakeupIntervalMS, injector); + HeartBeatHandler handler = new HeartBeatHandler(clusters, aq, am, injector); + Register reg = new Register(); + reg.setHostname(hostname1); + reg.setResponseId(12); + reg.setTimestamp(System.currentTimeMillis() - 300); + reg.setAgentVersion(ambariMetaInfo.getServerVersion()); + HostInfo hi = new HostInfo(); + hi.setOS("Centos5"); + reg.setHardwareProfile(hi); + handler.handleRegistration(reg); + + HeartBeat hb = new HeartBeat(); + hb.setHostname(hostname1); + hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool")); + hb.setTimestamp(System.currentTimeMillis()); + hb.setResponseId(12); + handler.handleHeartBeat(hb); + + List<StatusCommand> cmds = hm.generateStatusCommands(hostname1); + assertEquals("HeartbeatMonitor should generate StatusCommands for host1", 4, cmds.size()); + assertEquals("HDFS", cmds.get(0).getServiceName()); + + boolean containsNAGIOSStatus = false; + for (StatusCommand cmd : cmds) { + if (cmd.getComponentName().equals(Role.NAGIOS_SERVER.name())) { + containsNAGIOSStatus = true; + assertTrue(cmd.getClass().equals(NagiosAlertCommand.class)); + assertEquals(1, ((NagiosAlertCommand) cmd).getAlerts().size()); + } + + } + assertTrue(containsNAGIOSStatus); + + cmds = hm.generateStatusCommands(hostname2); + assertTrue("HeartbeatMonitor should not generate StatusCommands for host2 because it has no services", cmds.isEmpty()); + } } http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java index 92d3c40..cc45ef0 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java @@ -38,7 +38,6 @@ import org.apache.ambari.server.controller.spi.Resource; import org.apache.ambari.server.controller.spi.TemporalInfo; import org.apache.ambari.server.controller.utilities.PropertyHelper; import org.apache.ambari.server.state.Alert; -import org.apache.ambari.server.state.AlertState; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Clusters; import org.apache.ambari.server.state.Service; @@ -578,81 +577,4 @@ public class NagiosPropertyProviderTest { } } - @Test - public void testNagiosServiceAlertsWithAgentAlerts() throws Exception { - Injector inj = Guice.createInjector(new GuiceModule()); - - Clusters clusters = inj.getInstance(Clusters.class); - Cluster cluster = createMock(Cluster.class); - - Alert alert = new Alert("ganglia_madeup", null, "GANGLIA", "GANGLIA_MYSTERY", - "h1", AlertState.CRITICAL); - - expect(cluster.getAlerts()).andReturn(Collections.singleton(alert)).anyTimes(); - expect(clusters.getCluster("c1")).andReturn(cluster); - - Service nagiosService = createMock(Service.class); - expect(cluster.getService("NAGIOS")).andReturn(nagiosService); - - ServiceComponent nagiosServiceComponent = createMock(ServiceComponent.class); - expect(nagiosService.getServiceComponent("NAGIOS_SERVER")).andReturn(nagiosServiceComponent); - - ServiceComponentHost nagiosScHost = createMock(ServiceComponentHost.class); - Map<String, ServiceComponentHost> map1 = new HashMap<String, ServiceComponentHost>(); - map1.put(HOST, nagiosScHost); - expect(nagiosServiceComponent.getServiceComponentHosts()).andReturn(map1); - - replay(clusters, cluster, nagiosService, nagiosServiceComponent); - - - TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt"); - - NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service, - streamProvider, - "ServiceInfo/cluster_name", - "ServiceInfo/service_name"); - npp.forceReset(); - NagiosPropertyProvider.init(inj); - - Resource resource = new ResourceImpl(Resource.Type.Service); - resource.setProperty("ServiceInfo/cluster_name", "c1"); - resource.setProperty("ServiceInfo/service_name", "GANGLIA"); - - // request with an empty set should get all supported properties - Request request = PropertyHelper.getReadRequest(Collections.<String>emptySet(), new HashMap<String, TemporalInfo>()); - - Set<Resource> set = npp.populateResources(Collections.singleton(resource), request, null); - Assert.assertEquals(1, set.size()); - - Resource res = set.iterator().next(); - - Map<String, Map<String, Object>> values = res.getPropertiesMap(); - - Assert.assertTrue(values.containsKey("alerts")); - Assert.assertTrue(values.containsKey("alerts/summary")); - Assert.assertTrue(values.get("alerts").containsKey("detail")); - Assert.assertTrue(List.class.isInstance(values.get("alerts").get("detail"))); - - List<?> list = (List<?>) values.get("alerts").get("detail"); - // removed an additional one - Assert.assertEquals(Integer.valueOf(5), Integer.valueOf(list.size())); - for (Object o : list) { - Assert.assertTrue(Map.class.isInstance(o)); - Map<?, ?> map = (Map<?, ?>) o; - Assert.assertTrue(map.containsKey("service_name")); - String serviceName = map.get("service_name").toString(); - Assert.assertEquals(serviceName, "GANGLIA"); - } - - Map<String, Object> summary = values.get("alerts/summary"); - Assert.assertTrue(summary.containsKey("OK")); - Assert.assertTrue(summary.containsKey("WARNING")); - Assert.assertTrue(summary.containsKey("CRITICAL")); - Assert.assertTrue(summary.containsKey("PASSIVE")); - - Assert.assertEquals(Integer.valueOf(3), summary.get("OK")); - Assert.assertEquals(Integer.valueOf(0), summary.get("WARNING")); - Assert.assertEquals(Integer.valueOf(1), summary.get("CRITICAL")); - Assert.assertEquals(Integer.valueOf(1), summary.get("PASSIVE")); - } } http://git-wip-us.apache.org/repos/asf/ambari/blob/b94eb716/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py index 51f89df..f51169c 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py +++ b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py @@ -268,6 +268,11 @@ class TestNagiosServer(RMFTestCase): content = StaticFile('sys_logger.py'), mode = 0755, ) + self.assertResourceCalled('File', + '/usr/lib64/nagios/plugins/check_ambari_alerts.py', + content=StaticFile('check_ambari_alerts.py'), + mode=0755 + ) self.assertResourceCalled('Execute', 'htpasswd2 -c -b /etc/nagios/htpasswd.users nagiosadmin \'!`"\'"\'"\' 1\'', not_if="grep nagiosadmin /etc/nagios/htpasswd.users"