[
https://issues.apache.org/jira/browse/YARN-11000?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17739970#comment-17739970
]
ASF GitHub Bot commented on YARN-11000:
---------------------------------------
szilard-nemeth commented on code in PR #5783:
URL: https://github.com/apache/hadoop/pull/5783#discussion_r1252202346
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java:
##########
@@ -384,6 +388,38 @@ protected void setupQueueConfigs(Resource clusterResource)
throws
.parseConfiguredMaximumCapacityVector(queuePath.getFullPath(),
this.queueNodeLabelsSettings.getConfiguredNodeLabels(),
QueueCapacityVector.newInstance());
+
+ // Preserving the capacities set by Entitlements
+ if (this instanceof ReservationQueue ||
Review Comment:
Nit: Conditions can fit in one line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java:
##########
@@ -384,6 +388,38 @@ protected void setupQueueConfigs(Resource clusterResource)
throws
.parseConfiguredMaximumCapacityVector(queuePath.getFullPath(),
this.queueNodeLabelsSettings.getConfiguredNodeLabels(),
QueueCapacityVector.newInstance());
+
+ // Preserving the capacities set by Entitlements
Review Comment:
Can you add a more verbose explanation for this one?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceConfiguration.java:
##########
@@ -571,9 +572,13 @@ public void testComplexValidateAbsoluteResourceConfig()
throws Exception {
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
try {
cs.reinitialize(csConf, rm.getRMContext());
- Assert.fail();
+ if (csConf.isLegacyQueueMode()) {
+ Assert.fail();
Review Comment:
Please add a failure message or a comment.
Applies to all Assert.fail() calls below.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java:
##########
@@ -204,6 +207,36 @@ public void setUp() throws Exception {
super.setUp();
GuiceServletConfig.setInjector(
Guice.createInjector(new WebServletModule()));
+
+ rm.start();
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX)));
+
+ nm1 = new MockNM("127.0.0.1:1234", 2 * 1024,
+ rm.getResourceTrackerService());
+ MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024,
+ rm.getResourceTrackerService());
Review Comment:
Nit: Can fit in previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAutoCreatedQueueDeletionPolicy.java:
##########
@@ -17,33 +17,71 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
+import java.io.IOException;
+
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import
org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager;
+import
org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import
org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import
org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import
org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
import
org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
+import org.junit.After;
import org.junit.Assert;
+import org.junit.Before;
import org.junit.Test;
+import static
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacitySchedulerNewQueueAutoCreation.MAX_MEMORY;
+
public class TestAutoCreatedQueueDeletionPolicy
- extends TestCapacitySchedulerNewQueueAutoCreation {
+ extends TestCapacitySchedulerAutoCreatedQueueBase {
+ private CapacitySchedulerConfiguration csConf;
private CapacityScheduler cs;
- private AutoCreatedQueueDeletionPolicy policy;
+ private final AutoCreatedQueueDeletionPolicy policy = new
+ AutoCreatedQueueDeletionPolicy();
- public void prepareForSchedule() throws Exception{
- super.startScheduler();
+ private CapacitySchedulerQueueManager autoQueueHandler;
- policy = getPolicy();
- cs = getCs();
+ /*
+ Create the following structure:
+ root
+ / \
+ a b
+ /
+ a1
+ */
+ @Before
+ public void setUp() throws Exception {
+ csConf = new CapacitySchedulerConfiguration();
+ csConf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
- policy.editSchedule();
- // There are no queues should be scheduled
- Assert.assertEquals(policy.getMarkedForDeletion().size(), 0);
- Assert.assertEquals(policy.getSentForDeletion().size(), 0);
+ // By default, set 3 queues, a/b, and a.a1
+ csConf.setQueues("root", new String[]{"a", "b"});
+ csConf.setNonLabeledQueueWeight("root", 1f);
+ csConf.setNonLabeledQueueWeight("root.a", 1f);
+ csConf.setNonLabeledQueueWeight("root.b", 1f);
+ csConf.setQueues("root.a", new String[]{"a1"});
+ csConf.setNonLabeledQueueWeight("root.a.a1", 1f);
+ csConf.setAutoQueueCreationV2Enabled("root", true);
+ csConf.setAutoQueueCreationV2Enabled("root.a", true);
+ csConf.setAutoQueueCreationV2Enabled("root.e", true);
Review Comment:
What is queue "root.e" here? This was not mentioned in the comment about the
queue structure above.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java:
##########
@@ -384,6 +388,38 @@ protected void setupQueueConfigs(Resource clusterResource)
throws
.parseConfiguredMaximumCapacityVector(queuePath.getFullPath(),
this.queueNodeLabelsSettings.getConfiguredNodeLabels(),
QueueCapacityVector.newInstance());
+
+ // Preserving the capacities set by Entitlements
+ if (this instanceof ReservationQueue ||
+ this instanceof PlanQueue) {
+ for (final String label :
queueNodeLabelsSettings.getConfiguredNodeLabels()) {
+ setConfiguredMinCapacityVector(label,
+ QueueCapacityVector.of(queueCapacities.getCapacity(label) * 100,
+ QueueCapacityVector.ResourceUnitCapacityType.PERCENTAGE));
+ setConfiguredMaxCapacityVector(label,
+ QueueCapacityVector.of(queueCapacities.getMaximumCapacity(label)
* 100,
+ QueueCapacityVector.ResourceUnitCapacityType.PERCENTAGE));
+ }
+ }
+
+ // Re-adjust weight when mixed capacity type is used. 5w == [memory=5w,
vcores=5w]
Review Comment:
Nit: Is this covered by unit tests?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/helper/CapacitySchedulerInfoHelper.java:
##########
@@ -64,22 +69,23 @@ public class CapacitySchedulerInfoHelper {
private CapacitySchedulerInfoHelper() {}
- public static String getMode(CSQueue queue) throws YarnRuntimeException {
- if (queue.getCapacityConfigType() ==
- AbstractCSQueue.CapacityConfigType.ABSOLUTE_RESOURCE) {
- return "absolute";
- } else if (queue.getCapacityConfigType() ==
- AbstractCSQueue.CapacityConfigType.PERCENTAGE) {
- float weight = queue.getQueueCapacities().getWeight();
- if (weight == -1) {
- //-1 indicates we are not in weight mode
+ public static String getMode(CSQueue queue) {
+ final Set<QueueCapacityVector.ResourceUnitCapacityType>
definedCapacityTypes =
+ queue.getConfiguredCapacityVector(NO_LABEL).getDefinedCapacityTypes();
+ if (definedCapacityTypes.size() == 1) {
Review Comment:
A comment would be useful why this is required here.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbsoluteResourceCapacityCalculator.java:
##########
@@ -59,7 +59,8 @@ public double calculateMaximumResource(
public void updateCapacitiesAfterCalculation(
ResourceCalculationDriver resourceCalculationDriver, CSQueue queue,
String label) {
CapacitySchedulerQueueCapacityHandler.setQueueCapacities(
-
resourceCalculationDriver.getUpdateContext().getUpdatedClusterResource(label),
queue, label);
Review Comment:
Nit: This is a whitespace only change in this file. Could be removed from
change.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractParentQueue.java:
##########
@@ -1136,11 +1137,43 @@ public void refreshAfterResourceCalculation(Resource
clusterResource,
CSQueueUtils.updateConfiguredCapacityMetrics(resourceCalculator,
labelManager.getResourceByLabel(null, clusterResource),
RMNodeLabelsManager.NO_LABEL, this);
+
+ LOG.info("Refresh after resource calculation (PARENT) {}\n"
Review Comment:
How frequently this is gonna be printed? Shouldn't we rather use DEBUG level?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java:
##########
@@ -487,6 +487,20 @@ public static CapacitySchedulerConfiguration
setupQueueConfiguration(
return conf;
}
+ public static void
setupQueueConfigurationForSingleFlexibleAutoCreatedLeafQueue(
+ CapacitySchedulerConfiguration conf) {
+
+ //setup new queues with one of them auto enabled
+ // Define top-level queues
+ // Set childQueue for root
+ conf.setQueues(CapacitySchedulerConfiguration.ROOT,
+ new String[] {"c"});
Review Comment:
Nit: can fit into a single line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java:
##########
@@ -422,10 +458,20 @@ protected void setDynamicQueueProperties() {
.getConfiguredNodeLabelsForAllQueues()
.getLabelsByQueue(parentTemplate);
- if (parentNodeLabels != null && parentNodeLabels.size() > 1) {
- queueContext.getQueueManager()
- .getConfiguredNodeLabelsForAllQueues()
- .setLabelsByQueue(getQueuePath(), new HashSet<>(parentNodeLabels));
+ if (parentNodeLabels != null) {
+ if (parentNodeLabels.size() > 1) {
+ queueContext.getQueueManager().getConfiguredNodeLabelsForAllQueues()
+ .setLabelsByQueue(getQueuePath(), new
HashSet<>(parentNodeLabels));
+ }
+ // Default to weight 1
+ for (String label : parentNodeLabels) {
+ float weightByLabel = queueContext.getConfiguration()
+ .getLabeledQueueWeight(queuePath, label);
+ if (weightByLabel == -1) {
+ queueContext.getConfiguration()
+ .setLabeledQueueWeight(queuePath.getFullPath(), label, 1);
+ }
+ }
Review Comment:
Can you add a more verbose explanation for this one?
Is this covered by unit tests?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java:
##########
@@ -141,7 +141,9 @@ public void testAutoCreateLeafQueueCreation() throws
Exception {
validateInitialQueueEntitlement(parentQueue, USER0,
expectedChildQueueAbsCapacity, accessibleNodeLabelsOnC);
- validateUserAndAppLimits(autoCreatedLeafQueue, 4000, 4000);
+ // 6553/16384=0.3999633789 vs 0.4
+ final int maxApps = cs.getConfiguration().isLegacyQueueMode() ? 4000 :
3999;
Review Comment:
Is this about rounding/precision? This comment is unclear for me.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerNewQueueAutoCreation.java:
##########
@@ -311,6 +319,7 @@ public void
testAutoCreateQueueShouldFailWhenNonParentQueue()
public void testAutoCreateQueueWhenSiblingsNotInWeightMode()
throws Exception {
startScheduler();
+ csConf.setLegacyQueueModeEnabled(true);
Review Comment:
Can you please elaborate / comment why this flag is set to true for this
testcase in specific?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java:
##########
@@ -73,7 +82,7 @@ public void setup() throws IOException,
SchedulerDynamicEditException {
when(csContext.getMaximumResourceCapability()).thenReturn(
Resources.createResource(16 * GB, 32));
when(csContext.getClusterResource()).thenReturn(
- Resources.createResource(100 * 16 * GB, 100 * 32));
+ clusterResource);
Review Comment:
Nit: Can fit in the previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java:
##########
@@ -100,9 +109,9 @@ public void testAddSubtractCapacity() throws Exception {
autoCreatedLeafQueue.setCapacity(1.0F);
autoCreatedLeafQueue.setMaxCapacity(1.0F);
+
planQueue.updateClusterResource(
- Resources.createResource(100 * 16 * GB, 100 * 32),
- new ResourceLimits(Resources.createResource(100 * 16 * GB, 100 * 32)));
+ clusterResource, new ResourceLimits(clusterResource));
Review Comment:
Nit: Can fit in the previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java:
##########
@@ -297,11 +297,16 @@ private Configuration getSchedulerDynamicConfiguration()
throws IOException {
private CapacitySchedulerConfiguration
getSchedulerAutoCreatedQueueConfiguration(
- boolean overrideWithQueueMappings) throws IOException {
+ boolean overrideWithQueueMappings, boolean useFlexibleAQC) throws
IOException {
Review Comment:
Nit; IOException is not thrown anymore.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java:
##########
@@ -204,6 +207,36 @@ public void setUp() throws Exception {
super.setUp();
GuiceServletConfig.setInjector(
Guice.createInjector(new WebServletModule()));
+
+ rm.start();
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX)));
+
+ nm1 = new MockNM("127.0.0.1:1234", 2 * 1024,
+ rm.getResourceTrackerService());
Review Comment:
Nit: Can fit in previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java:
##########
@@ -204,6 +207,36 @@ public void setUp() throws Exception {
super.setUp();
GuiceServletConfig.setInjector(
Guice.createInjector(new WebServletModule()));
+
+ rm.start();
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX)));
+
+ nm1 = new MockNM("127.0.0.1:1234", 2 * 1024,
+ rm.getResourceTrackerService());
+ MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024,
+ rm.getResourceTrackerService());
+ nm1.registerNode();
+ nm2.registerNode();
+
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.2", 0), Sets.newHashSet(LABEL_LY)));
+
+ MockNM nm3 = new MockNM("127.0.0.2:1234", 128 * 1024,
+ rm.getResourceTrackerService());
+ nm3.registerNode();
+
+ // Default partition
+ MockNM nm4 = new MockNM("127.0.0.3:1234", 128 * 1024,
+ rm.getResourceTrackerService());
Review Comment:
Nit: Can fit in previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java:
##########
@@ -553,20 +571,20 @@ private void verifyPartitionCapacityInfoXML(Element
partitionInfo,
float absoluteCapacity, float absoluteUsedCapacity,
float absoluteMaxCapacity) {
assertEquals("capacity doesn't match", capacity,
- WebServicesTestUtils.getXmlFloat(partitionInfo, "capacity"), 1e-3f);
+ WebServicesTestUtils.getXmlFloat(partitionInfo, "capacity"), 1e-1f);
Review Comment:
Can you extract 1e-1f to a static final?
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java:
##########
@@ -5137,19 +5137,35 @@ public void
testSetupQueueConfigsWithSpecifiedConfiguration()
assertEquals(0, conf.size());
conf.setNodeLocalityDelay(60);
+
+ csConf.setQueues(ROOT, new String[] {leafQueueName, B});
csConf.setCapacity(ROOT + DOT + leafQueueName, 10);
- csConf.setMaximumCapacity(ROOT + DOT + leafQueueName, 100);
- csConf.setUserLimitFactor(ROOT + DOT +leafQueueName, 0.1f);
+ csConf.setMaximumCapacity(ROOT + DOT + leafQueueName,
+ 100);
+ csConf.setUserLimitFactor(ROOT + DOT + leafQueueName, 0.1f);
+
+ csConf.setCapacity(ROOT + DOT + B, 90);
+ csConf.setMaximumCapacity(ROOT + DOT + B,
+ 100);
Review Comment:
Nit: Can fit in previous line.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java:
##########
@@ -204,6 +207,36 @@ public void setUp() throws Exception {
super.setUp();
GuiceServletConfig.setInjector(
Guice.createInjector(new WebServletModule()));
+
+ rm.start();
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX)));
+
+ nm1 = new MockNM("127.0.0.1:1234", 2 * 1024,
+ rm.getResourceTrackerService());
+ MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024,
+ rm.getResourceTrackerService());
+ nm1.registerNode();
+ nm2.registerNode();
+
+ rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
+ .of(NodeId.newInstance("127.0.0.2", 0), Sets.newHashSet(LABEL_LY)));
+
+ MockNM nm3 = new MockNM("127.0.0.2:1234", 128 * 1024,
+ rm.getResourceTrackerService());
Review Comment:
Nit: Can fit in previous line.
> Replace queue resource calculation logic in updateClusterResource
> -----------------------------------------------------------------
>
> Key: YARN-11000
> URL: https://issues.apache.org/jira/browse/YARN-11000
> Project: Hadoop YARN
> Issue Type: Sub-task
> Components: capacity scheduler
> Reporter: Andras Gyori
> Assignee: Andras Gyori
> Priority: Major
> Labels: pull-request-available
> Time Spent: 2h 10m
> Remaining Estimate: 0h
>
> YARN-10965 introduces a brand new queue calculation system. In order to
> simplify the review process, this issue replaces the current logic with the
> newly introduced one.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]