AMBARI-20071 : Hadoop metrics sink prints lots of logs if collector is unavailable. (avijayan)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/64389cca Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/64389cca Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/64389cca Branch: refs/heads/branch-feature-AMBARI-20053 Commit: 64389cca8c6ebb15a46874aab4efba7e6352f25c Parents: bfaf613 Author: Aravindan Vijayan <[email protected]> Authored: Tue Feb 21 10:28:39 2017 -0800 Committer: Aravindan Vijayan <[email protected]> Committed: Tue Feb 21 10:28:39 2017 -0800 ---------------------------------------------------------------------- .../timeline/AbstractTimelineMetricsSink.java | 22 +++++++++++++------- .../timeline/HadoopTimelineMetricsSink.java | 11 +++++++--- 2 files changed, 23 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/64389cca/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java b/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java index f5a02e4..a1fd008 100644 --- a/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java +++ b/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java @@ -82,6 +82,8 @@ public abstract class AbstractTimelineMetricsSink { protected static final AtomicInteger failedCollectorConnectionsCounter = new AtomicInteger(0); public static int NUMBER_OF_SKIPPED_COLLECTOR_EXCEPTIONS = 100; + protected static final AtomicInteger nullCollectorCounter = new AtomicInteger(0); + public static int NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS = 20; public int ZK_CONNECT_TRY_COUNT = 10; public int ZK_SLEEP_BETWEEN_RETRY_TIME = 2000; public boolean shardExpired = true; @@ -214,7 +216,7 @@ public abstract class AbstractTimelineMetricsSink { collectorHost = targetCollectorHostSupplier.get(); // Last X attempts have failed - force refresh if (failedCollectorConnectionsCounter.get() > RETRY_COUNT_BEFORE_COLLECTOR_FAILOVER) { - LOG.info("Removing collector " + collectorHost + " from allKnownLiveCollectors."); + LOG.debug("Removing collector " + collectorHost + " from allKnownLiveCollectors."); allKnownLiveCollectors.remove(collectorHost); targetCollectorHostSupplier = null; collectorHost = findPreferredCollectHost(); @@ -224,8 +226,15 @@ public abstract class AbstractTimelineMetricsSink { } if (collectorHost == null) { - LOG.warn("No live collector to send metrics to. Metrics to be sent will be discarded."); + if (nullCollectorCounter.getAndIncrement() == 0) { + LOG.info("No live collector to send metrics to. Metrics to be sent will be discarded. " + + "This message will be skipped for the next " + NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS + " times."); + } else { + nullCollectorCounter.compareAndSet(NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS, 0); + } return false; + } else { + nullCollectorCounter.set(0); } String connectUrl = getCollectorUri(collectorHost); @@ -356,7 +365,7 @@ public abstract class AbstractTimelineMetricsSink { if (allKnownLiveCollectors.size() == 0 && getZookeeperQuorum() != null && (currentTime - lastFailedZkRequestTime) > zookeeperBackoffTimeMillis) { - LOG.info("No live collectors from configuration. Requesting zookeeper..."); + LOG.debug("No live collectors from configuration. Requesting zookeeper..."); allKnownLiveCollectors.addAll(collectorHAHelper.findLiveCollectorHostsFromZNode()); boolean noNewCollectorFromZk = true; for (String collectorHostFromZk : allKnownLiveCollectors) { @@ -366,7 +375,7 @@ public abstract class AbstractTimelineMetricsSink { } } if (noNewCollectorFromZk) { - LOG.info("No new collector was found from Zookeeper. Will not request zookeeper for " + zookeeperBackoffTimeMillis + " millis"); + LOG.debug("No new collector was found from Zookeeper. Will not request zookeeper for " + zookeeperBackoffTimeMillis + " millis"); lastFailedZkRequestTime = System.currentTimeMillis(); } } @@ -396,7 +405,7 @@ public abstract class AbstractTimelineMetricsSink { shardExpired = true; return collectorHost; } - LOG.warn("Couldn't find any live collectors. Returning null"); + LOG.debug("Couldn't find any live collectors. Returning null"); shardExpired = true; return null; } @@ -416,7 +425,7 @@ public abstract class AbstractTimelineMetricsSink { } break; // Found at least 1 live collector } catch (MetricCollectorUnavailableException e) { - LOG.info("Collector " + hostStr + " is not longer live. Removing " + + LOG.debug("Collector " + hostStr + " is not longer live. Removing " + "it from list of know live collector hosts : " + allKnownLiveCollectors); allKnownLiveCollectors.remove(hostStr); } @@ -473,7 +482,6 @@ public abstract class AbstractTimelineMetricsSink { LOG.debug(errorMessage); LOG.debug(ioe); String warnMsg = "Unable to connect to collector to find live nodes."; - LOG.warn(warnMsg); throw new MetricCollectorUnavailableException(warnMsg); } return collectors; http://git-wip-us.apache.org/repos/asf/ambari/blob/64389cca/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java b/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java index 14df30c..6e0eba5 100644 --- a/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java +++ b/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java @@ -116,10 +116,15 @@ public class HadoopTimelineMetricsSink extends AbstractTimelineMetricsSink imple String preferredCollectorHost = findPreferredCollectHost(); collectorUri = constructTimelineMetricUri(protocol, preferredCollectorHost, port); containerMetricsUri = constructContainerMetricUri(protocol, preferredCollectorHost, port); + + if (StringUtils.isNotEmpty(preferredCollectorHost)) { + LOG.info("Collector Uri: " + collectorUri); + LOG.info("Container Metrics Uri: " + containerMetricsUri); + } else { + LOG.info("No suitable collector found."); + } } - LOG.info("Collector Uri: " + collectorUri); - LOG.info("Container Metrics Uri: " + containerMetricsUri); timeoutSeconds = conf.getInt(METRICS_POST_TIMEOUT_SECONDS, DEFAULT_POST_TIMEOUT_SECONDS); @@ -462,7 +467,7 @@ public class HadoopTimelineMetricsSink extends AbstractTimelineMetricsSink imple executorService.submit(new Runnable() { @Override public void run() { - LOG.info("Closing HadoopTimelineMetricSink. Flushing metrics to collector..."); + LOG.debug("Closing HadoopTimelineMetricSink. Flushing metrics to collector..."); TimelineMetrics metrics = metricsCache.getAllMetrics(); if (metrics != null) { emitMetrics(metrics);
