Updated Branches: refs/heads/trunk 2707ef036 -> e17554c99
AMBARI-2651. If there is at least one host that is not heartbeating with host components in INSTALL_FAILED state, service operations fail. (smohanty) Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/e17554c9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/e17554c9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/e17554c9 Branch: refs/heads/trunk Commit: e17554c998da191d713b9ce8a07ae0a1111d366e Parents: 2707ef0 Author: Sumit Mohanty <[email protected]> Authored: Tue Jul 16 13:26:30 2013 -0700 Committer: Sumit Mohanty <[email protected]> Committed: Tue Jul 16 13:26:30 2013 -0700 ---------------------------------------------------------------------- .../AmbariManagementControllerImpl.java | 17 +- .../AmbariManagementControllerTest.java | 251 ++++++++++++++++--- 2 files changed, 238 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/e17554c9/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java index cf23ee8..180ebf8 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java @@ -1935,7 +1935,7 @@ public class AmbariManagementControllerImpl implements for (ServiceComponentHost potentialSch : sc .getServiceComponentHosts().values()) { if (!potentialSch.getHostState().equals(HostState - .HEARTBEAT_LOST)) { + .HEARTBEAT_LOST)) { potentialHosts.add(potentialSch); } } @@ -2068,6 +2068,20 @@ public class AmbariManagementControllerImpl implements for (State newState : changedScHosts.get(compName).keySet()) { for (ServiceComponentHost scHost : changedScHosts.get(compName).get(newState)) { + + // Do not create role command for hosts that are not responding + if (scHost.getHostState().equals(HostState.HEARTBEAT_LOST)) { + LOG.info("Command is not created for servicecomponenthost " + + ", clusterName=" + cluster.getClusterName() + + ", clusterId=" + cluster.getClusterId() + + ", serviceName=" + scHost.getServiceName() + + ", componentName=" + scHost.getServiceComponentName() + + ", hostname=" + scHost.getHostName() + + ", hostState=" + scHost.getHostState() + + ", targetNewState=" + newState); + continue; + } + RoleCommand roleCommand; State oldSchState = scHost.getState(); ServiceComponentHostEvent event; @@ -2888,6 +2902,7 @@ public class AmbariManagementControllerImpl implements } continue; } + if (newState == oldSchState) { sch.setDesiredState(newState); if (LOG.isDebugEnabled()) { http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/e17554c9/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java index db855cb..b6e08b3 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java @@ -268,58 +268,63 @@ public class AmbariManagementControllerTest { } private long startService(String clusterName, String serviceName, - boolean runSmokeTests, boolean reconfigureClients) throws - AmbariException { + boolean runSmokeTests, boolean reconfigureClients) throws + AmbariException { ServiceRequest r = new ServiceRequest(clusterName, serviceName, null, - State.STARTED.toString()); + State.STARTED.toString()); Set<ServiceRequest> requests = new HashSet<ServiceRequest>(); requests.add(r); Map<String, String> mapRequestProps = new HashMap<String, String>(); mapRequestProps.put("context", "Called from a test"); RequestStatusResponse resp = controller.updateServices(requests, - mapRequestProps, runSmokeTests, reconfigureClients); + mapRequestProps, runSmokeTests, reconfigureClients); Assert.assertEquals(State.STARTED, - clusters.getCluster(clusterName).getService(serviceName) - .getDesiredState()); - - // manually change live state to started as no running action manager - for (ServiceComponent sc : - clusters.getCluster(clusterName).getService(serviceName) - .getServiceComponents().values()) { - for (ServiceComponentHost sch : sc.getServiceComponentHosts().values()) { - sch.setState(State.STARTED); + clusters.getCluster(clusterName).getService(serviceName) + .getDesiredState()); + + if (resp != null) { + // manually change live state to stopped as no running action manager + List<HostRoleCommand> commands = actionDB.getRequestTasks(resp.getRequestId()); + for (HostRoleCommand cmd : commands) { + if (!cmd.getRole().toString().endsWith("CHECK")) { + clusters.getCluster(clusterName).getService(serviceName).getServiceComponent(cmd.getRole().name()) + .getServiceComponentHost(cmd.getHostName()).setState(State.STARTED); + } } + return resp.getRequestId(); + } else { + return -1; } - - return resp.getRequestId(); } private long installService(String clusterName, String serviceName, - boolean runSmokeTests, boolean reconfigureClients) throws - AmbariException { + boolean runSmokeTests, boolean reconfigureClients) throws + AmbariException { ServiceRequest r = new ServiceRequest(clusterName, serviceName, null, - State.INSTALLED.toString()); + State.INSTALLED.toString()); Set<ServiceRequest> requests = new HashSet<ServiceRequest>(); requests.add(r); Map<String, String> mapRequestProps = new HashMap<String, String>(); mapRequestProps.put("context", "Called from a test"); RequestStatusResponse resp = controller.updateServices(requests, - mapRequestProps, runSmokeTests, reconfigureClients); + mapRequestProps, runSmokeTests, reconfigureClients); Assert.assertEquals(State.INSTALLED, - clusters.getCluster(clusterName).getService(serviceName) - .getDesiredState()); - // manually change live state to stopped as no running action manager - for (ServiceComponent sc : - clusters.getCluster(clusterName).getService(serviceName) - .getServiceComponents().values()) { - for (ServiceComponentHost sch : sc.getServiceComponentHosts().values()) { - sch.setState(State.INSTALLED); + clusters.getCluster(clusterName).getService(serviceName) + .getDesiredState()); + + if (resp != null) { + // manually change live state to stopped as no running action manager + List<HostRoleCommand> commands = actionDB.getRequestTasks(resp.getRequestId()); + for (HostRoleCommand cmd : commands) { + clusters.getCluster(clusterName).getService(serviceName).getServiceComponent(cmd.getRole().name()) + .getServiceComponentHost(cmd.getHostName()).setState(State.INSTALLED); } + return resp.getRequestId(); + } else { + return -1; } - - return resp.getRequestId(); } @Test @@ -861,10 +866,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.getHost("h1").setOsType("centos5"); @@ -2458,10 +2465,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); @@ -2686,10 +2695,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); @@ -2882,14 +2893,17 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); + Set<ServiceComponentHostRequest> set1 = new HashSet<ServiceComponentHostRequest>(); ServiceComponentHostRequest r1 = @@ -3059,10 +3073,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); @@ -3657,10 +3673,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -4463,10 +4481,13 @@ public class AmbariManagementControllerTest { clusters.addHost(host2); clusters.addHost(host3); clusters.getHost("h1").setOsType("centos6"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.getHost("h3").setOsType("centos6"); + clusters.getHost("h3").setState(HostState.HEALTHY); clusters.getHost("h3").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -4629,10 +4650,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -4718,9 +4741,11 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.getHost("h1").setOsType("centos5"); @@ -4805,6 +4830,153 @@ public class AmbariManagementControllerTest { } @Test + public void testSkipTaskOnUnhealthyHosts() throws AmbariException { + String clusterName = "foo1"; + createCluster(clusterName); + clusters.getCluster(clusterName) + .setDesiredStackVersion(new StackId("HDP-0.1")); + String serviceName = "HDFS"; + createService(clusterName, serviceName, null); + String componentName1 = "NAMENODE"; + String componentName2 = "DATANODE"; + + createServiceComponent(clusterName, serviceName, componentName1, + State.INIT); + createServiceComponent(clusterName, serviceName, componentName2, + State.INIT); + + String host1 = "h1"; + clusters.addHost(host1); + clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); + clusters.getHost("h1").persist(); + String host2 = "h2"; + clusters.addHost(host2); + clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); + clusters.getHost("h2").persist(); + String host3 = "h3"; + clusters.addHost(host3); + clusters.getHost("h3").setOsType("centos6"); + clusters.getHost("h3").setState(HostState.HEALTHY); + clusters.getHost("h3").persist(); + + clusters.mapHostToCluster(host1, clusterName); + clusters.mapHostToCluster(host2, clusterName); + clusters.mapHostToCluster(host3, clusterName); + + createServiceComponentHost(clusterName, serviceName, componentName1, + host1, null); + createServiceComponentHost(clusterName, serviceName, componentName2, + host2, null); + createServiceComponentHost(clusterName, serviceName, componentName2, + host3, null); + + // Install + installService(clusterName, serviceName, false, false); + + // h1=HEALTHY, h2=HEARTBEAT_LOST, h3=WAITING_FOR_HOST_STATUS_UPDATES + clusters.getHost("h1").setState(HostState.HEALTHY); + clusters.getHost("h2").setState(HostState.HEALTHY); + clusters.getHost("h3").setState(HostState.HEARTBEAT_LOST); + + long requestId = startService(clusterName, serviceName, true, false); + List<HostRoleCommand> commands = actionDB.getRequestTasks(requestId); + Assert.assertEquals(3, commands.size()); + int commandCount = 0; + for (HostRoleCommand command : commands) { + if (command.getRoleCommand() == RoleCommand.START) { + Assert.assertTrue(command.getHostName().equals("h1") || command.getHostName().equals("h2")); + commandCount++; + } + } + Assert.assertEquals("Expect only two task.", 2, commandCount); + + stopService(clusterName, serviceName, false, false); + + // h1=HEARTBEAT_LOST, h2=HEARTBEAT_LOST, h3=HEALTHY + clusters.getHost("h1").setState(HostState.HEARTBEAT_LOST); + clusters.getHost("h2").setState(HostState.HEARTBEAT_LOST); + clusters.getHost("h3").setState(HostState.HEALTHY); + + requestId = startService(clusterName, serviceName, true, false); + commands = actionDB.getRequestTasks(requestId); + commandCount = 0; + for (HostRoleCommand command : commands) { + if (command.getRoleCommand() == RoleCommand.START) { + Assert.assertTrue(command.getHostName().equals("h3")); + commandCount++; + } + } + Assert.assertEquals("Expect only one task.", 1, commandCount); + + stopService(clusterName, serviceName, false, false); + + // h1=HEALTHY, h2=HEALTHY, h3=HEALTHY + clusters.getHost("h1").setState(HostState.HEALTHY); + clusters.getHost("h2").setState(HostState.HEALTHY); + clusters.getHost("h3").setState(HostState.HEALTHY); + + requestId = startService(clusterName, serviceName, true, false); + commands = actionDB.getRequestTasks(requestId); + commandCount = 0; + for (HostRoleCommand command : commands) { + if (command.getRoleCommand() == RoleCommand.START) { + Assert.assertTrue(command.getHostName().equals("h3") || + command.getHostName().equals("h2") || + command.getHostName().equals("h1")); + commandCount++; + } + } + Assert.assertEquals("Expect all three task.", 3, commandCount); + + // h1=HEALTHY, h2=HEARTBEAT_LOST, h3=HEALTHY + clusters.getHost("h2").setState(HostState.HEARTBEAT_LOST); + requestId = stopService(clusterName, serviceName, false, false); + commands = actionDB.getRequestTasks(requestId); + Assert.assertEquals(2, commands.size()); + commandCount = 0; + for (HostRoleCommand command : commands) { + if (command.getRoleCommand() == RoleCommand.STOP) { + Assert.assertTrue(command.getHostName().equals("h3") || + command.getHostName().equals("h1")); + commandCount++; + } + } + Assert.assertEquals("Expect only two task.", 2, commandCount); + + // Force a sch into INSTALL_FAILED + Cluster cluster = clusters.getCluster(clusterName); + Service s = cluster.getService(serviceName); + ServiceComponent sc3 = s.getServiceComponent(componentName2); + for (ServiceComponentHost sch : sc3.getServiceComponentHosts().values()) { + if (sch.getHostName().equals("h3")) { + sch.setState(State.INSTALL_FAILED); + } + } + + // h1=HEALTHY, h2=HEALTHY, h3=HEARTBEAT_LOST + clusters.getHost("h3").setState(HostState.HEARTBEAT_LOST); + clusters.getHost("h2").setState(HostState.HEALTHY); + requestId = installService(clusterName, serviceName, false, false); + Assert.assertEquals(-1, requestId); + + // All healthy, INSTALL should succeed + clusters.getHost("h3").setState(HostState.HEALTHY); + requestId = installService(clusterName, serviceName, false, false); + commands = actionDB.getRequestTasks(requestId); + Assert.assertEquals(1, commands.size()); + commandCount = 0; + for (HostRoleCommand command : commands) { + if (command.getRoleCommand() == RoleCommand.INSTALL) { + Assert.assertTrue(command.getHostName().equals("h3")); + commandCount++; + } + } + Assert.assertEquals("Expect only one task.", 1, commandCount); + } + + @Test public void testServiceCheckWhenHostIsUnhealthy() throws AmbariException { String clusterName = "foo1"; createCluster(clusterName); @@ -4826,14 +4998,17 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); String host3 = "h3"; clusters.addHost(host3); clusters.getHost("h3").setOsType("centos6"); + clusters.getHost("h3").setState(HostState.HEALTHY); clusters.getHost("h3").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -4940,14 +5115,17 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); String host3 = "h3"; clusters.addHost(host3); clusters.getHost("h3").setOsType("centos6"); + clusters.getHost("h3").setState(HostState.HEALTHY); clusters.getHost("h3").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -5016,9 +5194,11 @@ public class AmbariManagementControllerTest { String host2 = "h2"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); clusters.addHost(host2); clusters.getHost("h2").setOsType("centos5"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); @@ -5090,10 +5270,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -5188,10 +5370,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -5271,10 +5455,12 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); clusters.getHost("h1").setOsType("centos5"); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); clusters.getHost("h2").setOsType("centos6"); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.mapHostToCluster(host1, clusterName); @@ -5349,9 +5535,11 @@ public class AmbariManagementControllerTest { String host1 = "h1"; clusters.addHost(host1); + clusters.getHost("h1").setState(HostState.HEALTHY); clusters.getHost("h1").persist(); String host2 = "h2"; clusters.addHost(host2); + clusters.getHost("h2").setState(HostState.HEALTHY); clusters.getHost("h2").persist(); clusters.getHost("h1").setOsType("centos5"); @@ -5631,6 +5819,9 @@ public class AmbariManagementControllerTest { clusters.getHost(host1).setOsType("centos5"); clusters.getHost(host2).setOsType("centos6"); + clusters.getHost(host1).setState(HostState.HEALTHY); + clusters.getHost(host2).setState(HostState.HEALTHY); + clusters.mapHostToCluster(host1, clusterName); clusters.mapHostToCluster(host2, clusterName); @@ -5807,8 +5998,10 @@ public class AmbariManagementControllerTest { createServiceComponent(clusterName, pigServiceName, pigComponentName, null); clusters.addHost(host1); + clusters.getHost(host1).setState(HostState.HEALTHY); clusters.getHost(host1).persist(); clusters.addHost(host2); + clusters.getHost(host2).setState(HostState.HEALTHY); clusters.getHost(host2).persist(); clusters.getHost(host1).setOsType("centos5");
