Author: vinodkv
Date: Thu Sep 5 01:21:48 2013
New Revision: 1520189
URL: http://svn.apache.org/r1520189
Log:
YARN-957. Fixed a bug in CapacityScheduler because of which requests that need
more than a node's total capability were incorrectly allocated on that node
causing apps to hang. Contributed by Omkar Vinit Joshi.
svn merge --ignore-ancestry -c 1520187 ../../trunk/
Added:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
- copied unchanged from r1520187,
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt
(original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/CHANGES.txt Thu
Sep 5 01:21:48 2013
@@ -116,6 +116,10 @@ Release 2.1.1-beta - UNRELEASED
YARN-1077. Fixed TestContainerLaunch test failure on Windows. (Chuan Liu
via
vinodkv)
+ YARN-957. Fixed a bug in CapacityScheduler because of which requests that
+ need more than a node's total capability were incorrectly allocated on that
+ node causing apps to hang. (Omkar Vinit Joshi via vinodkv)
+
Release 2.1.0-beta - 2013-08-22
INCOMPATIBLE CHANGES
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java
Thu Sep 5 01:21:48 2013
@@ -67,4 +67,9 @@ public abstract class SchedulerNode {
*/
public abstract int getNumContainers();
+ /**
+ * Get total resources on the node.
+ * @return total resources on the node.
+ */
+ public abstract Resource getTotalResource();
}
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
Thu Sep 5 01:21:48 2013
@@ -1308,9 +1308,15 @@ public class LeafQueue implements CSQueu
+ " request=" + request + " type=" + type);
}
Resource capability = request.getCapability();
-
Resource available = node.getAvailableResource();
+ Resource totalResource = node.getTotalResource();
+ if (!Resources.fitsIn(capability, totalResource)) {
+ LOG.warn("Node : " + node.getNodeID()
+ + " does not have sufficient resource for request : " + request
+ + " node total capability : " + node.getTotalResource());
+ return Resources.none();
+ }
assert Resources.greaterThan(
resourceCalculator, clusterResource, available, Resources.none());
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java
Thu Sep 5 01:21:48 2013
@@ -49,6 +49,7 @@ public class FiCaSchedulerNode extends S
private Resource availableResource =
recordFactory.newRecordInstance(Resource.class);
private Resource usedResource =
recordFactory.newRecordInstance(Resource.class);
+ private Resource totalResourceCapability;
private volatile int numContainers;
@@ -65,6 +66,9 @@ public class FiCaSchedulerNode extends S
this.rmNode = node;
this.availableResource.setMemory(node.getTotalCapability().getMemory());
this.availableResource.setVirtualCores(node.getTotalCapability().getVirtualCores());
+ totalResourceCapability =
+ Resource.newInstance(node.getTotalCapability().getMemory(), node
+ .getTotalCapability().getVirtualCores());
if (usePortForNodeName) {
nodeName = rmNode.getHostName() + ":" + node.getNodeID().getPort();
} else {
@@ -126,6 +130,11 @@ public class FiCaSchedulerNode extends S
return this.usedResource;
}
+ @Override
+ public Resource getTotalResource() {
+ return this.totalResourceCapability;
+ }
+
private synchronized boolean isValidContainer(Container c) {
if (launchedContainers.containsKey(c.getId()))
return true;
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerNode.java
Thu Sep 5 01:21:48 2013
@@ -52,6 +52,7 @@ public class FSSchedulerNode extends Sch
private Resource availableResource;
private Resource usedResource =
recordFactory.newRecordInstance(Resource.class);
+ private Resource totalResourceCapability;
private volatile int numContainers;
@@ -68,6 +69,9 @@ public class FSSchedulerNode extends Sch
public FSSchedulerNode(RMNode node, boolean usePortForNodeName) {
this.rmNode = node;
this.availableResource = Resources.clone(node.getTotalCapability());
+ totalResourceCapability =
+ Resource.newInstance(node.getTotalCapability().getMemory(), node
+ .getTotalCapability().getVirtualCores());
if (usePortForNodeName) {
nodeName = rmNode.getHostName() + ":" + node.getNodeID().getPort();
} else {
@@ -173,6 +177,11 @@ public class FSSchedulerNode extends Sch
Resources.subtractFrom(usedResource, resource);
}
+ @Override
+ public Resource getTotalResource() {
+ return this.totalResourceCapability;
+ }
+
private synchronized void deductAvailableResource(Resource resource) {
if (resource == null) {
LOG.error("Invalid deduction of null resource for "
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java?rev=1520189&r1=1520188&r2=1520189&view=diff
==============================================================================
---
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
(original)
+++
hadoop/common/branches/branch-2.1-beta/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
Thu Sep 5 01:21:48 2013
@@ -232,6 +232,14 @@ public class MockRM extends ResourceMana
return nm;
}
+ public MockNM registerNode(String nodeIdStr, int memory, int vCores)
+ throws Exception {
+ MockNM nm =
+ new MockNM(nodeIdStr, memory, vCores, getResourceTrackerService());
+ nm.registerNode();
+ return nm;
+ }
+
public void sendNodeStarted(MockNM nm) throws Exception {
RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(
nm.getNodeId());