Repository: ambari Updated Branches: refs/heads/branch-2.5 cec36313c -> ee67387bf
AMBARI-20179 : AMS Collector shuts down with Helix-Zk related exception if partial /ambari-metrics-cluster znode exists. (avijayan) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ee67387b Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ee67387b Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ee67387b Branch: refs/heads/branch-2.5 Commit: ee67387bf0ca5b7a3012108dfa67d63524f53618 Parents: cec3631 Author: Aravindan Vijayan <[email protected]> Authored: Fri Feb 24 12:55:19 2017 -0800 Committer: Aravindan Vijayan <[email protected]> Committed: Fri Feb 24 12:55:26 2017 -0800 ---------------------------------------------------------------------- .../availability/MetricCollectorHAController.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/ee67387b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/availability/MetricCollectorHAController.java ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/availability/MetricCollectorHAController.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/availability/MetricCollectorHAController.java index 12c255e..53e6304 100644 --- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/availability/MetricCollectorHAController.java +++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/availability/MetricCollectorHAController.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.availability; import com.google.common.base.Joiner; +import org.I0Itec.zkclient.exception.ZkNoNodeException; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -37,6 +39,7 @@ import org.apache.helix.model.OnlineOfflineSMD; import org.apache.helix.model.StateModelDefinition; import org.apache.helix.tools.StateModelConfigGenerator;; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.TreeSet; @@ -123,8 +126,15 @@ public class MetricCollectorHAController { admin.addCluster(clusterName, false); // Adding host to the cluster - List<String> nodes = admin.getInstancesInCluster(clusterName); - if (nodes == null || !nodes.contains(instanceConfig.getInstanceName())) { + List<String> nodes = Collections.EMPTY_LIST; + try { + nodes = admin.getInstancesInCluster(clusterName); + } catch (ZkNoNodeException ex) { + LOG.warn("Child znode under /" + CLUSTER_NAME + " not found.Recreating the cluster."); + admin.addCluster(clusterName, true); + } + + if (CollectionUtils.isEmpty(nodes) || !nodes.contains(instanceConfig.getInstanceName())) { LOG.info("Adding participant instance " + instanceConfig); admin.addInstance(clusterName, instanceConfig); }
