YARN-3790. usedResource from rootQueue metrics may get stale data for FS scheduler after recovering the container (Zhihai Xu via rohithsharmaks)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1d6bcc98 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1d6bcc98 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1d6bcc98 Branch: refs/heads/YARN-2928 Commit: 1d6bcc9871ddae465b21db16467c67f8689ac65c Parents: f756240 Author: rohithsharmaks <rohithsharm...@apache.org> Authored: Wed Jun 24 23:00:14 2015 +0530 Committer: Zhijie Shen <zjs...@apache.org> Committed: Mon Jun 29 10:28:24 2015 -0700 ---------------------------------------------------------------------- hadoop-yarn-project/CHANGES.txt | 3 +++ .../resourcemanager/scheduler/fair/FairScheduler.java | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1d6bcc98/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a67af12..0c0fda1 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -808,6 +808,9 @@ Release 2.7.1 - UNRELEASED YARN-3832. Resource Localization fails on a cluster due to existing cache directories (Brahma Reddy Battula via jlowe) + YARN-3790. usedResource from rootQueue metrics may get stale data for FS + scheduler after recovering the container (Zhihai Xu via rohithsharmaks) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/1d6bcc98/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 2ed3b2a..cbc10e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationConstants; @@ -841,11 +842,11 @@ public class FairScheduler extends + " with event: " + event); } - private synchronized void addNode(RMNode node) { + private synchronized void addNode(List<NMContainerStatus> containerReports, + RMNode node) { FSSchedulerNode schedulerNode = new FSSchedulerNode(node, usePortForNodeName); nodes.put(node.getNodeID(), schedulerNode); Resources.addTo(clusterResource, node.getTotalCapability()); - updateRootQueueMetrics(); updateMaximumAllocation(schedulerNode, true); triggerUpdate(); @@ -854,6 +855,9 @@ public class FairScheduler extends queueMgr.getRootQueue().recomputeSteadyShares(); LOG.info("Added node " + node.getNodeAddress() + " cluster capacity: " + clusterResource); + + recoverContainersOnNode(containerReports, node); + updateRootQueueMetrics(); } private synchronized void removeNode(RMNode rmNode) { @@ -1147,8 +1151,7 @@ public class FairScheduler extends throw new RuntimeException("Unexpected event type: " + event); } NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event; - addNode(nodeAddedEvent.getAddedRMNode()); - recoverContainersOnNode(nodeAddedEvent.getContainerReports(), + addNode(nodeAddedEvent.getContainerReports(), nodeAddedEvent.getAddedRMNode()); break; case NODE_REMOVED: