Repository: ambari Updated Branches: refs/heads/trunk d89501c40 -> ec74a4b14
AMBARI-5784. Nagios read timeout on 1000 node cluster (ncole) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ec74a4b1 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ec74a4b1 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ec74a4b1 Branch: refs/heads/trunk Commit: ec74a4b14b4214d25601c5c6584361e93885552f Parents: d89501c Author: Nate Cole <[email protected]> Authored: Thu May 15 19:06:02 2014 -0400 Committer: Nate Cole <[email protected]> Committed: Thu May 15 20:38:53 2014 -0400 ---------------------------------------------------------------------- .../internal/AbstractProviderModule.java | 12 +++- .../controller/internal/URLStreamProvider.java | 22 +++++-- .../nagios/NagiosPropertyProvider.java | 20 +++++-- .../src/addOns/nagios/scripts/nagios_alerts.php | 63 +++++++++++++++----- 4 files changed, 88 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/ec74a4b1/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AbstractProviderModule.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AbstractProviderModule.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AbstractProviderModule.java index 3fa0dff..0ff3e62 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AbstractProviderModule.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AbstractProviderModule.java @@ -60,6 +60,8 @@ public abstract class AbstractProviderModule implements ProviderModule, Resource private static final int PROPERTY_REQUEST_CONNECT_TIMEOUT = 5000; private static final int PROPERTY_REQUEST_READ_TIMEOUT = 10000; + // nagios can take longer on big clusters + private static final int NAGIOS_READ_TIMEOUT = 30000; private static final String CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("Clusters", "cluster_name"); private static final String HOST_COMPONENT_CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("HostRoles", "cluster_name"); @@ -389,7 +391,7 @@ public abstract class AbstractProviderModule implements ProviderModule, Resource ComponentSSLConfiguration configuration = ComponentSSLConfiguration.instance(); URLStreamProvider streamProvider = new URLStreamProvider( PROPERTY_REQUEST_CONNECT_TIMEOUT, PROPERTY_REQUEST_READ_TIMEOUT, - configuration.getTruststorePath(), configuration.getTruststorePassword(), configuration.getTruststoreType()); + configuration); if (type.isInternalType()) { switch (type.getInternalType()) { @@ -403,7 +405,9 @@ public abstract class AbstractProviderModule implements ProviderModule, Resource break; case Service: providers.add(new NagiosPropertyProvider(type, - streamProvider, + new URLStreamProvider( + PROPERTY_REQUEST_CONNECT_TIMEOUT, NAGIOS_READ_TIMEOUT, + configuration), "ServiceInfo/cluster_name", "ServiceInfo/service_name")); break; @@ -417,7 +421,9 @@ public abstract class AbstractProviderModule implements ProviderModule, Resource PropertyHelper.getPropertyId("Hosts", "host_name") )); providers.add(new NagiosPropertyProvider(type, - streamProvider, + new URLStreamProvider( + PROPERTY_REQUEST_CONNECT_TIMEOUT, NAGIOS_READ_TIMEOUT, + configuration), "Hosts/cluster_name", "Hosts/host_name")); break; http://git-wip-us.apache.org/repos/asf/ambari/blob/ec74a4b1/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/URLStreamProvider.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/URLStreamProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/URLStreamProvider.java index e1c228d..2b32d11 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/URLStreamProvider.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/URLStreamProvider.java @@ -18,8 +18,6 @@ package org.apache.ambari.server.controller.internal; -import com.google.gson.Gson; - import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -30,14 +28,13 @@ import java.net.URLConnection; import java.security.KeyStore; import java.util.List; import java.util.Map; -import static javax.ws.rs.core.MediaType.APPLICATION_JSON; -import static javax.ws.rs.core.MediaType.APPLICATION_FORM_URLENCODED; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManagerFactory; +import org.apache.ambari.server.configuration.ComponentSSLConfiguration; import org.apache.ambari.server.controller.utilities.StreamProvider; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -52,7 +49,6 @@ public class URLStreamProvider implements StreamProvider { private static final String COOKIE = "Cookie"; private static final String WWW_AUTHENTICATE = "WWW-Authenticate"; private static final String NEGOTIATE = "Negotiate"; - private static final String CONTENT_TYPE_HEADER_PARAMETER = "Content-Type"; private static Log LOG = LogFactory.getLog(URLStreamProvider.class); private final int connTimeout; @@ -70,6 +66,22 @@ public class URLStreamProvider implements StreamProvider { * time, in milliseconds, to attempt a connection * @param readTimeout * the read timeout in milliseconds + * @param configuration configuration holding TrustStore information + */ + public URLStreamProvider(int connectionTimeout, int readTimeout, + ComponentSSLConfiguration configuration) { + this(connectionTimeout, readTimeout, + configuration.getTruststorePath(), + configuration.getTruststorePassword(), + configuration.getTruststoreType()); + } + /** + * Provide the connection timeout for the underlying connection. + * + * @param connectionTimeout + * time, in milliseconds, to attempt a connection + * @param readTimeout + * the read timeout in milliseconds */ public URLStreamProvider(int connectionTimeout, int readTimeout, String path, String password, String type) { http://git-wip-us.apache.org/repos/asf/ambari/blob/ec74a4b1/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java index c275157..a0785f8 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java @@ -109,6 +109,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv private String clusterNameProperty; private String resourceTypeProperty; private StreamProvider urlStreamProvider; + private boolean waitOnFirstCall = false; @Inject public static void init(Injector injector) { @@ -154,7 +155,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv } } } - }, 0L, 20L, TimeUnit.SECONDS); + }, 0L, 30L, TimeUnit.SECONDS); } /** @@ -163,6 +164,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv public void forceReset() { CLUSTER_NAMES.clear(); CLUSTER_ALERTS.clear(); + waitOnFirstCall = true; } @Override @@ -186,6 +188,9 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv continue; if (!CLUSTER_ALERTS.containsKey(clusterName)) { + // prevent endless looping for the first-time collection + CLUSTER_ALERTS.put(clusterName, Collections.<NagiosAlert>emptyList()); + Future<List<NagiosAlert>> f = scheduler.submit(new Callable<List<NagiosAlert>>() { @Override public List<NagiosAlert> call() throws Exception { @@ -193,11 +198,13 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv } }); - try { - CLUSTER_ALERTS.put(clusterName, f.get()); - } catch (Exception e) { - LOG.error("Could not load metrics - Executor exception" + - " (" + e.getMessage() + ")"); + if (waitOnFirstCall) { + try { + CLUSTER_ALERTS.put(clusterName, f.get()); + } catch (Exception e) { + LOG.error("Could not load metrics - Executor exception" + + " (" + e.getMessage() + ")"); + } } } @@ -354,6 +361,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv String url = String.format(template, nagiosHost); InputStream in = null; + try { in = urlStreamProvider.readFrom(url); http://git-wip-us.apache.org/repos/asf/ambari/blob/ec74a4b1/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php ---------------------------------------------------------------------- diff --git a/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php b/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php index 388ce94..8b9ccae 100644 --- a/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php +++ b/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php @@ -161,6 +161,7 @@ function hdp_mon_generate_response( $response_data ) $services_object = array (); $services_object["PUPPET"] = 0; foreach ($matches[0] as $object) { + if (getParameter($object, "service_description") == HDFS_SERVICE_CHECK) { $services_object["HDFS"] = getParameter($object, "last_hard_state"); if ($services_object["HDFS"] >= 1) { @@ -234,6 +235,7 @@ function hdp_mon_generate_response( $response_data ) $hostcounts_object = array (); $up_hosts = 0; $down_hosts = 0; + foreach ($matches[0] as $object) { if (getParameter($object, "last_hard_state") != ok) { $down_hosts++; @@ -294,13 +296,14 @@ function hdp_mon_generate_response( $response_data ) #echo $matches[1][0] . ", " . $matches[1][1] . "\n"; $services_objects = array (); $i = 0; - foreach ($matches[0] as $object) { + foreach ($matches[1] as $object) { + $map = getParameterMap($object); $servicestatus = array (); switch ($alert_type) { case "all": - if (empty($host) || getParameter($object, "host_name") == $host) { + if (empty($host) || getParameterMapValue($map, "host_name") == $host) { foreach ($servicestatus_attributes as $attrib) { - $servicestatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); } $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); $srv_desc = explode ("::",$servicestatus['service_description'],2); @@ -309,10 +312,10 @@ function hdp_mon_generate_response( $response_data ) } break; case "nok": - if (getParameter($object, "last_hard_state") != ok && - (empty($host) || getParameter($object, "host_name") == $host)) { + if (getParameterMapValue($map, "last_hard_state") != ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { foreach ($servicestatus_attributes as $attrib) { - $servicestatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); } $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); $srv_desc = explode ("::",$servicestatus['service_description'],2); @@ -320,10 +323,10 @@ function hdp_mon_generate_response( $response_data ) } break; case "ok": - if (getParameter($object, "last_hard_state") == ok && - (empty($host) || getParameter($object, "host_name") == $host)) { + if (getParameterMapValue($map, "last_hard_state") == ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { foreach ($servicestatus_attributes as $attrib) { - $servicestatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); } $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); $srv_desc = explode ("::",$servicestatus['service_description'],2); @@ -331,10 +334,10 @@ function hdp_mon_generate_response( $response_data ) } break; case "warn": - if (getParameter($object, "last_hard_state") == warn && - (empty($host) || getParameter($object, "host_name") == $host)) { + if (getParameterMapValue($map, "last_hard_state") == warn && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { foreach ($servicestatus_attributes as $attrib) { - $servicestatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); } $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); $srv_desc = explode ("::",$servicestatus['service_description'],2); @@ -342,10 +345,10 @@ function hdp_mon_generate_response( $response_data ) } break; case "critical": - if (getParameter($object, "last_hard_state") == critical && - (empty($host) || getParameter($object, "host_name") == $host)) { + if (getParameterMapValue($map, "last_hard_state") == critical && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { foreach ($servicestatus_attributes as $attrib) { - $servicestatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); } $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); $srv_desc = explode ("::",$servicestatus['service_description'],2); @@ -436,6 +439,36 @@ function hdp_mon_generate_response( $response_data ) return $value; } + function getParameterMapValue($map, $key) { + $value = $map[$key]; + + if (!is_null($value)) + return "" . $value; + + return ""; + } + + function getParameterMap($object) { + $map = array (); + + $long_key = "long_plugin_output"; + $found_long = false; + foreach (preg_split("/\n/", trim($object)) as $line) { + $arr = explode("=", $line, 2); + $key = trim($arr[0]); + if ($found_long) { + $map[$long_key] = trim($line); + $found_long = false; + } else { + $map[$key] = $arr[1]; + if ($key == $long_key) + $found_long = true; + } + } + + return $map; + } + function indent($json) { $result = '';
