YARN-4055. Report node resource utilization in heartbeat. (Inigo Goiri via kasha)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/13604bd5 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/13604bd5 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/13604bd5 Branch: refs/heads/HDFS-7285 Commit: 13604bd5f119fc81b9942190dfa366afad61bc92 Parents: def1293 Author: Karthik Kambatla <ka...@apache.org> Authored: Sun Aug 16 15:08:53 2015 -0700 Committer: Karthik Kambatla <ka...@apache.org> Committed: Sun Aug 16 15:08:53 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/client/TestResourceTrackerOnHA.java | 2 +- .../yarn/server/api/records/NodeStatus.java | 18 +++++++++++++++- .../api/records/impl/pb/NodeStatusPBImpl.java | 22 ++++++++++++++++++++ .../main/proto/yarn_server_common_protos.proto | 1 + .../hadoop/yarn/server/nodemanager/Context.java | 2 ++ .../yarn/server/nodemanager/NodeManager.java | 16 ++++++++++++-- .../nodemanager/NodeStatusUpdaterImpl.java | 13 +++++++++++- 8 files changed, 72 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 287a913..66978a0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -172,6 +172,9 @@ Release 2.8.0 - UNRELEASED YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha) + YARN-4055. Report node resource utilization in heartbeat. + (Inigo Goiri via kasha) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java index c51570c..6cdf87f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java @@ -68,7 +68,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase{ failoverThread = createAndStartFailoverThread(); NodeStatus status = NodeStatus.newInstance(NodeId.newInstance("localhost", 0), 0, null, - null, null, null); + null, null, null, null); NodeHeartbeatRequest request2 = NodeHeartbeatRequest.newInstance(status, null, null,null); resourceTracker.nodeHeartbeat(request2); http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java index 38b0381..24391bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java @@ -47,13 +47,15 @@ public abstract class NodeStatus { * @param keepAliveApplications Applications to keep alive. * @param nodeHealthStatus Health status of the node. * @param containersUtilizations Utilization of the containers in this node. + * @param nodeUtilization Utilization of the node. * @return New {@code NodeStatus} with the provided information. */ public static NodeStatus newInstance(NodeId nodeId, int responseId, List<ContainerStatus> containerStatuses, List<ApplicationId> keepAliveApplications, NodeHealthStatus nodeHealthStatus, - ResourceUtilization containersUtilization) { + ResourceUtilization containersUtilization, + ResourceUtilization nodeUtilization) { NodeStatus nodeStatus = Records.newRecord(NodeStatus.class); nodeStatus.setResponseId(responseId); nodeStatus.setNodeId(nodeId); @@ -61,6 +63,7 @@ public abstract class NodeStatus { nodeStatus.setKeepAliveApplications(keepAliveApplications); nodeStatus.setNodeHealthStatus(nodeHealthStatus); nodeStatus.setContainersUtilization(containersUtilization); + nodeStatus.setNodeUtilization(nodeUtilization); return nodeStatus; } @@ -92,4 +95,17 @@ public abstract class NodeStatus { @Unstable public abstract void setContainersUtilization( ResourceUtilization containersUtilization); + + /** + * Get the <em>resource utilization</em> of the node. + * @return <em>resource utilization</em> of the node + */ + @Public + @Stable + public abstract ResourceUtilization getNodeUtilization(); + + @Private + @Unstable + public abstract void setNodeUtilization( + ResourceUtilization nodeUtilization); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java index fffd6a9..2d139fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java @@ -314,6 +314,28 @@ public class NodeStatusPBImpl extends NodeStatus { .setContainersUtilization(convertToProtoFormat(containersUtilization)); } + @Override + public ResourceUtilization getNodeUtilization() { + NodeStatusProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if (!p.hasNodeUtilization()) { + return null; + } + return convertFromProtoFormat(p.getNodeUtilization()); + } + + @Override + public void setNodeUtilization( + ResourceUtilization nodeUtilization) { + maybeInitBuilder(); + if (nodeUtilization == null) { + this.builder.clearNodeUtilization(); + return; + } + this.builder + .setNodeUtilization(convertToProtoFormat(nodeUtilization)); + } + private NodeIdProto convertToProtoFormat(NodeId nodeId) { return ((NodeIdPBImpl)nodeId).getProto(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index a810813..901051f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -37,6 +37,7 @@ message NodeStatusProto { optional NodeHealthStatusProto nodeHealthStatus = 4; repeated ApplicationIdProto keep_alive_applications = 5; optional ResourceUtilizationProto containers_utilization = 6; + optional ResourceUtilizationProto node_utilization = 7; } message MasterKeyProto { http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 42a4234..52d937b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -70,6 +70,8 @@ public interface Context { ContainerManagementProtocol getContainerManager(); + NodeResourceMonitor getNodeResourceMonitor(); + LocalDirsHandlerService getLocalDirsHandler(); ApplicationACLsManager getApplicationACLsManager(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index a06293d..327171b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -93,7 +93,8 @@ public class NodeManager extends CompositeService private AsyncDispatcher dispatcher; private ContainerManagerImpl containerManager; private NodeStatusUpdater nodeStatusUpdater; - private static CompositeServiceShutdownHook nodeManagerShutdownHook; + private NodeResourceMonitor nodeResourceMonitor; + private static CompositeServiceShutdownHook nodeManagerShutdownHook; private NMStateStoreService nmStore = null; private AtomicBoolean isStopping = new AtomicBoolean(false); @@ -292,8 +293,9 @@ public class NodeManager extends CompositeService nodeLabelsProvider); } - NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor(); + nodeResourceMonitor = createNodeResourceMonitor(); addService(nodeResourceMonitor); + ((NMContext) context).setNodeResourceMonitor(nodeResourceMonitor); containerManager = createContainerManager(context, exec, del, nodeStatusUpdater, @@ -413,6 +415,7 @@ public class NodeManager extends CompositeService private final NMContainerTokenSecretManager containerTokenSecretManager; private final NMTokenSecretManagerInNM nmTokenSecretManager; private ContainerManagementProtocol containerManager; + private NodeResourceMonitor nodeResourceMonitor; private final LocalDirsHandlerService dirsHandler; private final ApplicationACLsManager aclsManager; private WebServer webServer; @@ -478,6 +481,15 @@ public class NodeManager extends CompositeService } @Override + public NodeResourceMonitor getNodeResourceMonitor() { + return this.nodeResourceMonitor; + } + + public void setNodeResourceMonitor(NodeResourceMonitor nodeResourceMonitor) { + this.nodeResourceMonitor = nodeResourceMonitor; + } + + @Override public ContainerManagementProtocol getContainerManager() { return this.containerManager; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/13604bd5/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 7c5c28b..0680ea3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -431,10 +431,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements } List<ContainerStatus> containersStatuses = getContainerStatuses(); ResourceUtilization containersUtilization = getContainersUtilization(); + ResourceUtilization nodeUtilization = getNodeUtilization(); NodeStatus nodeStatus = NodeStatus.newInstance(nodeId, responseId, containersStatuses, createKeepAliveApplicationList(), nodeHealthStatus, - containersUtilization); + containersUtilization, nodeUtilization); return nodeStatus; } @@ -451,6 +452,16 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return containersMonitor.getContainersUtilization(); } + /** + * Get the utilization of the node. This includes the containers. + * @return Resource utilization of the node. + */ + private ResourceUtilization getNodeUtilization() { + NodeResourceMonitorImpl nodeResourceMonitor = + (NodeResourceMonitorImpl) this.context.getNodeResourceMonitor(); + return nodeResourceMonitor.getUtilization(); + } + // Iterate through the NMContext and clone and get all the containers' // statuses. If it's a completed container, add into the // recentlyStoppedContainers collections.