AMBARI-20071 : Hadoop metrics sink prints lots of logs if collector is 
unavailable. (avijayan)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/64389cca
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/64389cca
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/64389cca

Branch: refs/heads/branch-feature-AMBARI-20053
Commit: 64389cca8c6ebb15a46874aab4efba7e6352f25c
Parents: bfaf613
Author: Aravindan Vijayan <[email protected]>
Authored: Tue Feb 21 10:28:39 2017 -0800
Committer: Aravindan Vijayan <[email protected]>
Committed: Tue Feb 21 10:28:39 2017 -0800

----------------------------------------------------------------------
 .../timeline/AbstractTimelineMetricsSink.java   | 22 +++++++++++++-------
 .../timeline/HadoopTimelineMetricsSink.java     | 11 +++++++---
 2 files changed, 23 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/64389cca/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java
----------------------------------------------------------------------
diff --git 
a/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java
 
b/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java
index f5a02e4..a1fd008 100644
--- 
a/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java
+++ 
b/ambari-metrics/ambari-metrics-common/src/main/java/org/apache/hadoop/metrics2/sink/timeline/AbstractTimelineMetricsSink.java
@@ -82,6 +82,8 @@ public abstract class AbstractTimelineMetricsSink {
 
   protected static final AtomicInteger failedCollectorConnectionsCounter = new 
AtomicInteger(0);
   public static int NUMBER_OF_SKIPPED_COLLECTOR_EXCEPTIONS = 100;
+  protected static final AtomicInteger nullCollectorCounter = new 
AtomicInteger(0);
+  public static int NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS = 20;
   public int ZK_CONNECT_TRY_COUNT = 10;
   public int ZK_SLEEP_BETWEEN_RETRY_TIME = 2000;
   public boolean shardExpired = true;
@@ -214,7 +216,7 @@ public abstract class AbstractTimelineMetricsSink {
       collectorHost = targetCollectorHostSupplier.get();
       // Last X attempts have failed - force refresh
       if (failedCollectorConnectionsCounter.get() > 
RETRY_COUNT_BEFORE_COLLECTOR_FAILOVER) {
-        LOG.info("Removing collector " + collectorHost + " from 
allKnownLiveCollectors.");
+        LOG.debug("Removing collector " + collectorHost + " from 
allKnownLiveCollectors.");
         allKnownLiveCollectors.remove(collectorHost);
         targetCollectorHostSupplier = null;
         collectorHost = findPreferredCollectHost();
@@ -224,8 +226,15 @@ public abstract class AbstractTimelineMetricsSink {
     }
 
     if (collectorHost == null) {
-      LOG.warn("No live collector to send metrics to. Metrics to be sent will 
be discarded.");
+      if (nullCollectorCounter.getAndIncrement() == 0) {
+        LOG.info("No live collector to send metrics to. Metrics to be sent 
will be discarded. " +
+          "This message will be skipped for the next " + 
NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS + " times.");
+      } else {
+        
nullCollectorCounter.compareAndSet(NUMBER_OF_NULL_COLLECTOR_EXCEPTIONS, 0);
+      }
       return false;
+    } else {
+      nullCollectorCounter.set(0);
     }
 
     String connectUrl = getCollectorUri(collectorHost);
@@ -356,7 +365,7 @@ public abstract class AbstractTimelineMetricsSink {
     if (allKnownLiveCollectors.size() == 0 && getZookeeperQuorum() != null
       && (currentTime - lastFailedZkRequestTime) > zookeeperBackoffTimeMillis) 
{
 
-      LOG.info("No live collectors from configuration. Requesting 
zookeeper...");
+      LOG.debug("No live collectors from configuration. Requesting 
zookeeper...");
       
allKnownLiveCollectors.addAll(collectorHAHelper.findLiveCollectorHostsFromZNode());
       boolean noNewCollectorFromZk = true;
       for (String collectorHostFromZk : allKnownLiveCollectors) {
@@ -366,7 +375,7 @@ public abstract class AbstractTimelineMetricsSink {
         }
       }
       if (noNewCollectorFromZk) {
-        LOG.info("No new collector was found from Zookeeper. Will not request 
zookeeper for " + zookeeperBackoffTimeMillis + " millis");
+        LOG.debug("No new collector was found from Zookeeper. Will not request 
zookeeper for " + zookeeperBackoffTimeMillis + " millis");
         lastFailedZkRequestTime = System.currentTimeMillis();
       }
     }
@@ -396,7 +405,7 @@ public abstract class AbstractTimelineMetricsSink {
       shardExpired = true;
       return collectorHost;
     }
-    LOG.warn("Couldn't find any live collectors. Returning null");
+    LOG.debug("Couldn't find any live collectors. Returning null");
     shardExpired = true;
     return null;
   }
@@ -416,7 +425,7 @@ public abstract class AbstractTimelineMetricsSink {
             }
             break; // Found at least 1 live collector
           } catch (MetricCollectorUnavailableException e) {
-            LOG.info("Collector " + hostStr + " is not longer live. Removing " 
+
+            LOG.debug("Collector " + hostStr + " is not longer live. Removing 
" +
               "it from list of know live collector hosts : " + 
allKnownLiveCollectors);
             allKnownLiveCollectors.remove(hostStr);
           }
@@ -473,7 +482,6 @@ public abstract class AbstractTimelineMetricsSink {
       LOG.debug(errorMessage);
       LOG.debug(ioe);
       String warnMsg = "Unable to connect to collector to find live nodes.";
-      LOG.warn(warnMsg);
       throw new MetricCollectorUnavailableException(warnMsg);
     }
     return collectors;

http://git-wip-us.apache.org/repos/asf/ambari/blob/64389cca/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java
----------------------------------------------------------------------
diff --git 
a/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java
 
b/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java
index 14df30c..6e0eba5 100644
--- 
a/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java
+++ 
b/ambari-metrics/ambari-metrics-hadoop-sink/src/main/java/org/apache/hadoop/metrics2/sink/timeline/HadoopTimelineMetricsSink.java
@@ -116,10 +116,15 @@ public class HadoopTimelineMetricsSink extends 
AbstractTimelineMetricsSink imple
       String preferredCollectorHost = findPreferredCollectHost();
       collectorUri = constructTimelineMetricUri(protocol, 
preferredCollectorHost, port);
       containerMetricsUri = constructContainerMetricUri(protocol, 
preferredCollectorHost, port);
+
+      if (StringUtils.isNotEmpty(preferredCollectorHost)) {
+        LOG.info("Collector Uri: " + collectorUri);
+        LOG.info("Container Metrics Uri: " + containerMetricsUri);
+      } else {
+        LOG.info("No suitable collector found.");
+      }
     }
 
-    LOG.info("Collector Uri: " + collectorUri);
-    LOG.info("Container Metrics Uri: " + containerMetricsUri);
 
     timeoutSeconds = conf.getInt(METRICS_POST_TIMEOUT_SECONDS, 
DEFAULT_POST_TIMEOUT_SECONDS);
 
@@ -462,7 +467,7 @@ public class HadoopTimelineMetricsSink extends 
AbstractTimelineMetricsSink imple
     executorService.submit(new Runnable() {
       @Override
       public void run() {
-        LOG.info("Closing HadoopTimelineMetricSink. Flushing metrics to 
collector...");
+        LOG.debug("Closing HadoopTimelineMetricSink. Flushing metrics to 
collector...");
         TimelineMetrics metrics = metricsCache.getAllMetrics();
         if (metrics != null) {
           emitMetrics(metrics);

Reply via email to