Repository: ambari Updated Branches: refs/heads/branch-2.1 f8efd01a3 -> 0d5b3ce5b
AMBARI-12924 - Upgrade Orchestration To Skip Unhealthy Hosts (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0d5b3ce5 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0d5b3ce5 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0d5b3ce5 Branch: refs/heads/branch-2.1 Commit: 0d5b3ce5b327b6851996268faad77eefbf5d1148 Parents: f8efd01 Author: Jonathan Hurley <[email protected]> Authored: Fri Aug 28 15:19:58 2015 -0400 Committer: Jonathan Hurley <[email protected]> Committed: Mon Aug 31 10:47:33 2015 -0400 ---------------------------------------------------------------------- .../apache/ambari/server/stack/HostsType.java | 7 +++ .../ambari/server/stack/MasterHostResolver.java | 59 ++++++++++++-------- .../state/stack/upgrade/ClusterGrouping.java | 12 +++- .../ambari/server/state/UpgradeHelperTest.java | 44 +++++++++++++++ 4 files changed, 96 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/0d5b3ce5/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java b/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java index 55313d5..9c953f2 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java @@ -49,6 +49,13 @@ public class HostsType { */ public LinkedHashSet<String> hosts = new LinkedHashSet<String>(); + /** + * Unhealthy hosts are those which are explicitely put into maintenance mode. + * If there is a host which is not heartbeating (or is generally unhealthy) + * but not in maintenance mode, then the prerequisite upgrade checks will let + * the administrator know that it must be put into maintenance mode before an + * upgrade can begin. + */ public List<ServiceComponentHost> unhealthy = new ArrayList<ServiceComponentHost>(); } http://git-wip-us.apache.org/repos/asf/ambari/blob/0d5b3ce5/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java b/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java index ef75d38..62613ff 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java @@ -31,7 +31,8 @@ import java.util.Set; import org.apache.ambari.server.AmbariException; import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.ConfigHelper; -import org.apache.ambari.server.state.HostState; +import org.apache.ambari.server.state.Host; +import org.apache.ambari.server.state.MaintenanceState; import org.apache.ambari.server.state.ServiceComponent; import org.apache.ambari.server.state.ServiceComponentHost; import org.apache.ambari.server.utils.HTTPUtils; @@ -134,7 +135,7 @@ public class MasterHostResolver { case HDFS: if (componentName.equalsIgnoreCase("NAMENODE")) { if (componentHosts.size() != 2) { - return filterSameVersion(hostsType, serviceName, componentName); + return filterHosts(hostsType, serviceName, componentName); } Map<Status, String> pair = getNameNodePair(); @@ -163,43 +164,55 @@ public class MasterHostResolver { LOG.error("Unable to get master and hosts for Component " + componentName + ". Error: " + err.getMessage(), err); } - hostsType = filterSameVersion(hostsType, serviceName, componentName); + hostsType = filterHosts(hostsType, serviceName, componentName); return hostsType; } /** - * Compares the versions of a HostComponent to the version for the resolver. - * If version is unspecified for the object, the {@link HostsType} object is - * returned without change. + * Filters the supplied list of hosts in the following ways: + * <ul> + * <li>Compares the versions of a HostComponent to the version for the + * resolver. Only versions that do not match are retained.</li> + * <li>Removes unhealthy hosts in maintenance mode from the list of healthy + * hosts</li> + * </ul> * - * @param hostsType the hosts to resolve - * @param service the service name - * @param component the component name - * @return the modified hosts instance with filtered and unhealthy hosts filled + * @param hostsType + * the hosts to resolve + * @param service + * the service name + * @param component + * the component name + * @return the modified hosts instance with filtered and unhealthy hosts + * filled */ - private HostsType filterSameVersion(HostsType hostsType, String service, String component) { - + private HostsType filterHosts(HostsType hostsType, String service, String component) { try { org.apache.ambari.server.state.Service svc = m_cluster.getService(service); ServiceComponent sc = svc.getServiceComponent(component); // !!! not really a fan of passing these around - List<ServiceComponentHost> unhealthy = new ArrayList<ServiceComponentHost>(); - LinkedHashSet<String> toUpgrade = new LinkedHashSet<String>(); - - for (String host : hostsType.hosts) { - ServiceComponentHost sch = sc.getServiceComponentHost(host); - - if (HostState.HEALTHY != sch.getHostState() && !sc.isMasterComponent()) { - unhealthy.add(sch); + List<ServiceComponentHost> unhealthyHosts = new ArrayList<ServiceComponentHost>(); + LinkedHashSet<String> upgradeHosts = new LinkedHashSet<String>(); + + for (String hostName : hostsType.hosts) { + ServiceComponentHost sch = sc.getServiceComponentHost(hostName); + Host host = sch.getHost(); + MaintenanceState maintenanceState = host.getMaintenanceState(sch.getClusterId()); + + // !!! FIXME: only rely on maintenance state once the upgrade endpoint + // is using the pre-req endpoint for determining if an upgrade is + // possible + if (maintenanceState != MaintenanceState.OFF && !sc.isMasterComponent()) { + unhealthyHosts.add(sch); } else if (null == m_version || null == sch.getVersion() || !sch.getVersion().equals(m_version)) { - toUpgrade.add(host); + upgradeHosts.add(hostName); } } - hostsType.unhealthy = unhealthy; - hostsType.hosts = toUpgrade; + hostsType.unhealthy = unhealthyHosts; + hostsType.hosts = upgradeHosts; return hostsType; } catch (AmbariException e) { http://git-wip-us.apache.org/repos/asf/ambari/blob/0d5b3ce5/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java index ad84210..cf58511 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java @@ -35,7 +35,9 @@ import javax.xml.bind.annotation.XmlTransient; import javax.xml.bind.annotation.XmlType; import org.apache.ambari.server.stack.HostsType; +import org.apache.ambari.server.state.Cluster; import org.apache.ambari.server.state.Host; +import org.apache.ambari.server.state.MaintenanceState; import org.apache.ambari.server.state.UpgradeContext; import org.apache.ambari.server.state.stack.UpgradePack.ProcessingComponent; @@ -224,11 +226,15 @@ public class ClusterGrouping extends Grouping { new TaskWrapper(service, component, realHosts, et)); } } else if (null == service && null == component) { - // no service, no component goes to all hosts - + // no service and no component will distributed the task to all healthy + // hosts not in maintenance mode + Cluster cluster = ctx.getCluster(); Set<String> hostNames = new HashSet<String>(); for (Host host : ctx.getCluster().getHosts()) { - hostNames.add(host.getHostName()); + MaintenanceState maintenanceState = host.getMaintenanceState(cluster.getClusterId()); + if (maintenanceState == MaintenanceState.OFF) { + hostNames.add(host.getHostName()); + } } return new StageWrapper( http://git-wip-us.apache.org/repos/asf/ambari/blob/0d5b3ce5/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java b/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java index 6267f53..7077f4c 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java @@ -194,6 +194,50 @@ public class UpgradeHelperTest { } /** + * Tests that hosts in MM are not included in the upgrade. + * + * @throws Exception + */ + @Test + public void testUpgradeOrchestrationWithHostsInMM() throws Exception { + Map<String, UpgradePack> upgrades = ambariMetaInfo.getUpgradePacks("foo", "bar"); + assertTrue(upgrades.isEmpty()); + + upgrades = ambariMetaInfo.getUpgradePacks("HDP", "2.1.1"); + + ServiceInfo si = ambariMetaInfo.getService("HDP", "2.1.1", "ZOOKEEPER"); + si.setDisplayName("Zk"); + + ComponentInfo ci = si.getComponentByName("ZOOKEEPER_SERVER"); + ci.setDisplayName("ZooKeeper1 Server2"); + + assertTrue(upgrades.containsKey("upgrade_test")); + UpgradePack upgrade = upgrades.get("upgrade_test"); + assertNotNull(upgrade); + + // turn on MM for the first host + Cluster cluster = makeCluster(); + Host hostInMaintenanceMode = cluster.getHosts().iterator().next(); + hostInMaintenanceMode.setMaintenanceState(cluster.getClusterId(), MaintenanceState.ON); + + // use a "real" master host resolver here so that we can actually test MM + MasterHostResolver masterHostResolver = new MasterHostResolver(null, cluster, ""); + + UpgradeContext context = new UpgradeContext(masterHostResolver, HDP_21, HDP_21, + UPGRADE_VERSION, Direction.UPGRADE); + + List<UpgradeGroupHolder> groups = m_upgradeHelper.createSequence(upgrade, context); + assertEquals(6, groups.size()); + + for (UpgradeGroupHolder group : groups) { + for (StageWrapper stageWrapper : group.items) { + Set<String> hosts = stageWrapper.getHosts(); + assertFalse(hosts.contains(hostInMaintenanceMode.getHostName())); + } + } + } + + /** * Verify that a Rolling Upgrades restarts the NameNodes in the following order: standby, active. * @throws Exception */
