Updated Branches: refs/heads/branch-1.4.3 3f6e460e4 -> 3c3402a36
AMBARI 4206. Significant lag between host status update and slave/master component start/stop Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3c3402a3 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3c3402a3 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3c3402a3 Branch: refs/heads/branch-1.4.3 Commit: 3c3402a3685fc629b15fd83fc1b005a66b442955 Parents: 3f6e460 Author: Sumit Mohanty <[email protected]> Authored: Thu Jan 2 10:06:56 2014 -0800 Committer: Sumit Mohanty <[email protected]> Committed: Thu Jan 2 10:06:56 2014 -0800 ---------------------------------------------------------------------- .../test/python/ambari_agent/TestHardware.py | 20 ++++-- .../ambari/server/agent/HeartBeatHandler.java | 74 ++++++++++++-------- .../internal/HostResourceProvider.java | 62 ---------------- .../ambari/server/state/host/HostImpl.java | 13 ++-- .../apache/ambari/server/utils/StageUtils.java | 2 - .../server/agent/TestHeartbeatHandler.java | 58 ++++++++++++++- 6 files changed, 122 insertions(+), 107 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-agent/src/test/python/ambari_agent/TestHardware.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestHardware.py b/ambari-agent/src/test/python/ambari_agent/TestHardware.py index e4937ec..2e82a8a 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestHardware.py +++ b/ambari-agent/src/test/python/ambari_agent/TestHardware.py @@ -22,6 +22,8 @@ from unittest import TestCase from ambari_agent.Hardware import Hardware from mock.mock import patch from ambari_agent.Facter import Facter +import unittest +import socket class TestHardware(TestCase): def test_build(self): @@ -75,14 +77,18 @@ class TestHardware(TestCase): self.assertEquals(result, None) - @patch.object(Facter, "getFqdn") - def test_fqdnDomainHostname(self, facter_getFqdn_mock): - facter_getFqdn_mock.return_value = "ambari.apache.org" + ''' + This test validates the current behavior where hostname and fqdn are + the same. + ''' + @patch.object(socket, "getfqdn") + def test_fqdnDomainHostname(self, socket_getFqdn_mock): + socket_getFqdn_mock.return_value = "ambari.apache.org" result = Facter().facterInfo() - self.assertEquals(result['hostname'], "ambari") - self.assertEquals(result['domain'], "apache.org") - self.assertEquals(result['fqdn'], (result['hostname'] + '.' + result['domain'])) + self.assertEquals(result['hostname'], "ambari.apache.org") + self.assertEquals(result['domain'], "") + self.assertEquals(result['fqdn'], (result['hostname'] + result['domain'])) @patch.object(Facter, "setDataUpTimeOutput") def test_uptimeSecondsHoursDays(self, facter_setDataUpTimeOutput_mock): @@ -154,4 +160,6 @@ lo Link encap:Local Loopback self.assertEquals(result['netmask'], '255.255.255.0') self.assertEquals(result['interfaces'], 'eth0,eth1,lo') +if __name__ == "__main__": + unittest.main() http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java index 2501f61..b421bff 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java @@ -17,7 +17,6 @@ */ package org.apache.ambari.server.agent; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -182,7 +181,8 @@ public class HeartBeatHandler { // Examine heartbeart for component live status reports processStatusReports(heartbeat, hostname, clusterFsm); - //Calculate host status + // Calculate host status + // NOTE: This step must be after processing command/status reports processHostStatus(heartbeat, hostname); // Send commands if node is active @@ -196,44 +196,61 @@ public class HeartBeatHandler { protected void processHostStatus(HeartBeat heartbeat, String hostname) throws AmbariException { Host host = clusterFsm.getHost(hostname); - - HealthStatus healthStatus = host.getHealthStatus().getHealthStatus(); - + if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) { List<ComponentStatus> componentStatuses = heartbeat.getComponentStatus(); //Host status info could be calculated only if agent returned statuses in heartbeat + //Or, if a command is executed that can change component status + boolean calculateHostStatus = false; + String clusterName = null; if (componentStatuses.size() > 0) { + calculateHostStatus = true; + for (ComponentStatus componentStatus : componentStatuses) { + clusterName = componentStatus.getClusterName(); + break; + } + } + + if (!calculateHostStatus) { + List<CommandReport> reports = heartbeat.getReports(); + for (CommandReport report : reports) { + if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand())) { + continue; + } + + String service = report.getServiceName(); + if (actionMetadata.getActions(service.toLowerCase()).contains(report.getRole())) { + continue; + } + if (report.getStatus().equals("COMPLETED")) { + calculateHostStatus = true; + clusterName = report.getClusterName(); + break; + } + } + } + if (calculateHostStatus) { + //Use actual component status to compute the host status int masterCount = 0; int mastersRunning = 0; int slaveCount = 0; int slavesRunning = 0; - Map<String, StackId> stackIdsByClusters = - new HashMap<String, StackId>(); - - for (ComponentStatus componentStatus : componentStatuses) { - - String clusterName = componentStatus.getClusterName(); - - StackId stackId; - if (stackIdsByClusters.containsKey(clusterName)) { - stackId = stackIdsByClusters.get(clusterName); - - } else { - Cluster cluster = clusterFsm.getCluster(clusterName); - stackId = cluster.getDesiredStackVersion(); - stackIdsByClusters.put(clusterName, stackId); - } + StackId stackId; + Cluster cluster = clusterFsm.getCluster(clusterName); + stackId = cluster.getDesiredStackVersion(); + List<ServiceComponentHost> scHosts = cluster.getServiceComponentHosts(heartbeat.getHostname()); + for (ServiceComponentHost scHost : scHosts) { ComponentInfo componentInfo = ambariMetaInfo.getComponent(stackId.getStackName(), - stackId.getStackVersion(), componentStatus.getServiceName(), - componentStatus.getComponentName()); + stackId.getStackVersion(), scHost.getServiceName(), + scHost.getServiceComponentName()); - String status = componentStatus.getStatus(); + String status = scHost.getState().name(); String category = componentInfo.getCategory(); @@ -249,7 +266,7 @@ public class HeartBeatHandler { } } } - + if (masterCount == mastersRunning && slaveCount == slavesRunning) { healthStatus = HostHealthStatus.HealthStatus.HEALTHY; } else if (masterCount > 0 && mastersRunning < masterCount) { @@ -257,18 +274,17 @@ public class HeartBeatHandler { } else { healthStatus = HostHealthStatus.HealthStatus.ALERT; } - + host.setStatus(healthStatus.name()); host.persist(); } - + //If host doesn't belongs to any cluster if ((clusterFsm.getClustersForHost(host.getHostName())).size() == 0) { healthStatus = HostHealthStatus.HealthStatus.HEALTHY; host.setStatus(healthStatus.name()); host.persist(); - } - + } } } http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java index 0c9c1f6..1759826 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java @@ -664,66 +664,4 @@ public class HostResourceProvider extends AbstractControllerResourceProvider { } } } - - // calculate the host status, accounting for the state of the host components - private String calculateHostStatus(HostResponse response) throws AmbariException { - HostHealthStatus.HealthStatus healthStatus = response.getHealthStatus().getHealthStatus(); - - if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) { - AmbariManagementController controller = getManagementController(); - AmbariMetaInfo ambariMetaInfo = controller.getAmbariMetaInfo(); - Clusters clusters = controller.getClusters(); - String clusterName = response.getClusterName(); - - if (clusterName != null && clusterName.length() > 0) { - Cluster cluster = clusters.getCluster(clusterName); - if (cluster != null) { - StackId stackId = cluster.getDesiredStackVersion(); - - ServiceComponentHostRequest request = new ServiceComponentHostRequest(clusterName, - null, null, response.getHostname(), null); - - Set<ServiceComponentHostResponse> hostComponentResponses = - controller.getHostComponents(Collections.singleton(request)); - - int masterCount = 0; - int mastersRunning = 0; - int slaveCount = 0; - int slavesRunning = 0; - - for (ServiceComponentHostResponse hostComponentResponse : hostComponentResponses ) { - ComponentInfo componentInfo = ambariMetaInfo.getComponentCategory(stackId.getStackName(), - stackId.getStackVersion(), hostComponentResponse.getServiceName(), - hostComponentResponse.getComponentName()); - - if (componentInfo != null) { - String category = componentInfo.getCategory(); - String state = hostComponentResponse.getLiveState(); - - if (category.equals("MASTER")) { - ++masterCount; - if (state.equals("STARTED")) { - ++mastersRunning; - } - } else if (category.equals("SLAVE")) { - ++slaveCount; - if (state.equals("STARTED")) { - ++slavesRunning; - } - } - } - } - - if (masterCount == mastersRunning && slaveCount == slavesRunning) { - healthStatus = HostHealthStatus.HealthStatus.HEALTHY; - } else if (masterCount > 0 && mastersRunning < masterCount ) { - healthStatus = HostHealthStatus.HealthStatus.UNHEALTHY; - } else { - healthStatus = HostHealthStatus.HealthStatus.ALERT; - } - } - } - } - return healthStatus.toString(); - } } http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java index d2ac9c0..78f18fa 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java @@ -1009,12 +1009,13 @@ public class HostImpl implements Host { @Override public void setStatus(String status) { - try { - writeLock.lock(); - this.status = status; - } - finally { - writeLock.unlock(); + if (status != null && !status.equals(this.status)) { + try { + writeLock.lock(); + this.status = status; + } finally { + writeLock.unlock(); + } } } http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java index f696357..4ea8fa6 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java @@ -250,8 +250,6 @@ public class StageUtils { //Add index of host to current host role hostsForComponentsHost.add(hostIndex); } - else - LOG.warn("Component " + componentName + " doesn't have mapped role name for cluster host info"); } } } http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java index 4b91179..de772f5 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java @@ -1317,6 +1317,14 @@ public class TestHeartbeatHandler { Cluster cluster = getDummyCluster(); Host hostObject = clusters.getHost(DummyHostname1); clusters.mapHostToCluster(hostObject.getHostName(), cluster.getClusterName()); + Service hdfs = cluster.addService(HDFS); + hdfs.persist(); + hdfs.addServiceComponent(DATANODE).persist(); + hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist(); + hdfs.addServiceComponent(NAMENODE).persist(); + hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist(); + hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED); + hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED); ActionQueue aq = new ActionQueue(); @@ -1400,8 +1408,54 @@ public class TestHeartbeatHandler { hb3.setComponentStatus(componentStatus); handler.handleHeartBeat(hb3); assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus()); - - + + //All are up + hb1.setResponseId(3); + handler.handleHeartBeat(hb1); + assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus()); + + //Only one component reported status + hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.INSTALLED); + HeartBeat hb4 = new HeartBeat(); + hb4.setResponseId(4); + hb4.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus)); + hb4.setHostname(DummyHostname1); + componentStatus = new ArrayList<ComponentStatus>(); + dataNodeStatus = new ComponentStatus(); + dataNodeStatus.setClusterName(cluster.getClusterName()); + dataNodeStatus.setServiceName(HDFS); + dataNodeStatus.setComponentName(DATANODE); + dataNodeStatus.setStatus("STARTED"); + componentStatus.add(dataNodeStatus); + hb4.setComponentStatus(componentStatus); + handler.handleHeartBeat(hb4); + assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus()); + + hb1.setResponseId(5); + handler.handleHeartBeat(hb1); + assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus()); + + //Some command reports + HeartBeat hb5 = new HeartBeat(); + hb5.setResponseId(6); + hb5.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus)); + hb5.setHostname(DummyHostname1); + CommandReport cr1 = new CommandReport(); + cr1.setActionId(StageUtils.getActionId(requestId, stageId)); + cr1.setServiceName(HDFS); + cr1.setTaskId(1); + cr1.setRole(DATANODE); + cr1.setStatus("COMPLETED"); + cr1.setStdErr(""); + cr1.setStdOut(""); + cr1.setExitCode(215); + cr1.setRoleCommand("STOP"); + cr1.setClusterName(DummyCluster); + ArrayList<CommandReport> reports = new ArrayList<CommandReport>(); + reports.add(cr1); + hb5.setReports(reports); + handler.handleHeartBeat(hb5); + assertEquals(HostHealthStatus.HealthStatus.ALERT.name(), hostObject.getStatus()); } private ActionManager getMockActionManager() {
