This is an automated email from the ASF dual-hosted git repository. snemeth pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push: new 3653754 YARN-10590. Consider legacy auto queue creation absolute resource template to avoid rounding errors. Contributed by Andras Gyori 3653754 is described below commit 365375412fe5eea82549630ee8c5598502b95caf Author: Szilard Nemeth <snem...@apache.org> AuthorDate: Tue Feb 22 12:26:35 2022 +0100 YARN-10590. Consider legacy auto queue creation absolute resource template to avoid rounding errors. Contributed by Andras Gyori --- .../scheduler/capacity/AutoCreatedLeafQueue.java | 23 +++++++++--- .../capacity/AutoCreatedLeafQueueConfig.java | 15 ++++++++ .../scheduler/capacity/ManagedParentQueue.java | 41 ++++++++++++++-------- .../GuaranteedOrZeroCapacityOverTimePolicy.java | 10 +++--- .../TestAbsoluteResourceWithAutoQueue.java | 8 ++--- .../TestCapacitySchedulerAutoCreatedQueueBase.java | 28 +++++++++------ 6 files changed, 83 insertions(+), 42 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java index 384a652..910d8de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; @@ -30,6 +31,8 @@ import java.io.IOException; import java.util.HashSet; import java.util.Set; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.CapacityConfigType.ABSOLUTE_RESOURCE; + /** * Leaf queues which are auto created by an underlying implementation of * AbstractManagedParentQueue. Eg: PlanQueue for reservations or @@ -81,14 +84,14 @@ public class AutoCreatedLeafQueue extends AbstractAutoCreatedLeafQueue { QueueCapacities capacities = leafQueueTemplate.getQueueCapacities(); //reset capacities for the leaf queue - mergeCapacities(capacities); + mergeCapacities(capacities, leafQueueTemplate.getResourceQuotas()); } finally { writeLock.unlock(); } } - public void mergeCapacities(QueueCapacities capacities) { + public void mergeCapacities(QueueCapacities capacities, QueueResourceQuotas resourceQuotas) { for ( String nodeLabel : capacities.getExistingNodeLabels()) { queueCapacities.setCapacity(nodeLabel, capacities.getCapacity(nodeLabel)); @@ -101,9 +104,19 @@ public class AutoCreatedLeafQueue extends AbstractAutoCreatedLeafQueue { Resource resourceByLabel = labelManager.getResourceByLabel(nodeLabel, queueContext.getClusterResource()); - getQueueResourceQuotas().setEffectiveMinResource(nodeLabel, - Resources.multiply(resourceByLabel, - queueCapacities.getAbsoluteCapacity(nodeLabel))); + // Update effective resource from template due to rounding errors. + // However, we need to consider deactivation as well, in which case we fall back to + // Percentage calculation (as absolute capacity will be 0, resource will be zero as well). + if (getCapacityConfigType().equals(ABSOLUTE_RESOURCE) + && queueCapacities.getAbsoluteCapacity(nodeLabel) > 0) { + getQueueResourceQuotas().setEffectiveMinResource(nodeLabel, + resourceQuotas.getConfiguredMinResource(nodeLabel)); + } else { + getQueueResourceQuotas().setEffectiveMinResource(nodeLabel, + Resources.multiply(resourceByLabel, + queueCapacities.getAbsoluteCapacity(nodeLabel))); + } + getQueueResourceQuotas().setEffectiveMaxResource(nodeLabel, Resources.multiply(resourceByLabel, queueCapacities .getAbsoluteMaximumCapacity(nodeLabel))); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java index 87ef1c0..78e8a8c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas; + /** * Auto Created Leaf queue configurations, capacity */ @@ -31,15 +33,19 @@ public class AutoCreatedLeafQueueConfig { private CapacitySchedulerConfiguration leafQueueConfigs; + private final QueueResourceQuotas resourceQuotas; + public AutoCreatedLeafQueueConfig(Builder builder) { this.queueCapacities = builder.queueCapacities; this.leafQueueConfigs = builder.leafQueueConfigs; + this.resourceQuotas = builder.queueResourceQuotas; } public static class Builder { private QueueCapacities queueCapacities; private CapacitySchedulerConfiguration leafQueueConfigs; + private QueueResourceQuotas queueResourceQuotas; public Builder capacities(QueueCapacities capacities) { this.queueCapacities = capacities; @@ -54,6 +60,11 @@ public class AutoCreatedLeafQueueConfig { public AutoCreatedLeafQueueConfig build() { return new AutoCreatedLeafQueueConfig(this); } + + public Builder resourceQuotas(QueueResourceQuotas queueResourceQuotas) { + this.queueResourceQuotas = queueResourceQuotas; + return this; + } } public QueueCapacities getQueueCapacities() { @@ -64,6 +75,10 @@ public class AutoCreatedLeafQueueConfig { return leafQueueConfigs; } + public QueueResourceQuotas getResourceQuotas() { + return resourceQuotas; + } + @Override public String toString() { return "AutoCreatedLeafQueueConfig{" + "queueCapacities=" + queueCapacities + ", leafQueueConfigs=" + leafQueueConfigs + '}'; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java index 7b019d9..6867c6b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler .SchedulerDynamicEditException; @@ -161,25 +162,14 @@ public class ManagedParentQueue extends AbstractManagedParentQueue { CapacitySchedulerConfiguration autoCreatedTemplateConfig = super.initializeLeafQueueConfigs(leafQueueTemplateConfPrefix); builder.configuration(autoCreatedTemplateConfig); + QueueResourceQuotas queueResourceQuotas = new QueueResourceQuotas(); + setAbsoluteResourceTemplates(configuration, queueResourceQuotas); + QueuePath templateQueuePath = configuration .getAutoCreatedQueueObjectTemplateConfPrefix(getQueuePath()); - Set<String> templateConfiguredNodeLabels = queueContext .getQueueManager().getConfiguredNodeLabelsForAllQueues() .getLabelsByQueue(templateQueuePath.getFullPath()); - for (String nodeLabel : templateConfiguredNodeLabels) { - Resource templateMinResource = autoCreatedTemplateConfig.getMinimumResourceRequirement( - nodeLabel, configuration - .getAutoCreatedQueueTemplateConfPrefix(getQueuePath()), - resourceTypes); - - if (this.capacityConfigType.equals(CapacityConfigType.PERCENTAGE) - && !templateMinResource.equals(Resources.none())) { - throw new IOException("Managed Parent Queue " + this.getQueuePath() - + " config type is different from leaf queue template config type"); - } - } - //Load template capacities QueueCapacities queueCapacities = new QueueCapacities(false); CSQueueUtils.loadCapacitiesByLabelsFromConf(templateQueuePath, @@ -187,7 +177,6 @@ public class ManagedParentQueue extends AbstractManagedParentQueue { configuration, templateConfiguredNodeLabels); - /** * Populate leaf queue template (of Parent resources configured in * ABSOLUTE_RESOURCE) capacities with actual values for which configured has @@ -198,9 +187,31 @@ public class ManagedParentQueue extends AbstractManagedParentQueue { updateQueueCapacities(queueCapacities); } builder.capacities(queueCapacities); + builder.resourceQuotas(queueResourceQuotas); return builder; } + private void setAbsoluteResourceTemplates(CapacitySchedulerConfiguration configuration, + QueueResourceQuotas queueResourceQuotas) throws IOException { + QueuePath templateQueuePath = configuration + .getAutoCreatedQueueObjectTemplateConfPrefix(getQueuePath()); + Set<String> templateConfiguredNodeLabels = queueContext + .getQueueManager().getConfiguredNodeLabelsForAllQueues() + .getLabelsByQueue(templateQueuePath.getFullPath()); + + for (String nodeLabel : templateConfiguredNodeLabels) { + Resource templateMinResource = configuration.getMinimumResourceRequirement( + nodeLabel, templateQueuePath.getFullPath(), resourceTypes); + queueResourceQuotas.setConfiguredMinResource(nodeLabel, templateMinResource); + + if (this.capacityConfigType.equals(CapacityConfigType.PERCENTAGE) + && !templateMinResource.equals(Resources.none())) { + throw new IOException("Managed Parent Queue " + this.getQueuePath() + + " config type is different from leaf queue template config type"); + } + } + } + private void updateQueueCapacities(QueueCapacities queueCapacities) { CapacitySchedulerConfiguration configuration = queueContext.getConfiguration(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java index 0d51983..46bb40a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java @@ -592,19 +592,18 @@ public class GuaranteedOrZeroCapacityOverTimePolicy for (String nodeLabel : updatedQueueTemplate.getQueueCapacities() .getExistingNodeLabels()) { - if (updatedQueueTemplate.getQueueCapacities(). - getCapacity(nodeLabel) > 0) { + if (updatedQueueTemplate.getQueueCapacities().getCapacity(nodeLabel) > 0) { if (isActive(leafQueue, nodeLabel)) { LOG.debug("Queue is already active. Skipping activation : {}", leafQueue.getQueuePath()); } else{ activate(leafQueue, nodeLabel); } - } else{ + } else { if (!isActive(leafQueue, nodeLabel)) { LOG.debug("Queue is already de-activated. Skipping " + "de-activation : {}", leafQueue.getQueuePath()); - } else{ + } else { /** * While deactivating queues of type ABSOLUTE_RESOURCE, configured * min resource has to be set based on updated capacity (which is @@ -613,7 +612,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy * leads to incorrect results. */ leafQueue - .mergeCapacities(updatedQueueTemplate.getQueueCapacities()); + .mergeCapacities(updatedQueueTemplate.getQueueCapacities(), leafQueueTemplate.getResourceQuotas()); leafQueue.getQueueResourceQuotas() .setConfiguredMinResource(Resources.multiply( managedParentQueue.getQueueContext().getClusterResource(), @@ -787,6 +786,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy AutoCreatedLeafQueueConfig.Builder templateBuilder = new AutoCreatedLeafQueueConfig.Builder(); templateBuilder.capacities(capacities); + templateBuilder.resourceQuotas(managedParentQueue.getLeafQueueTemplate().getResourceQuotas()); return new AutoCreatedLeafQueueConfig(templateBuilder); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceWithAutoQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceWithAutoQueue.java index 326b9d0..e826bcb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceWithAutoQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAbsoluteResourceWithAutoQueue.java @@ -148,8 +148,6 @@ public class TestAbsoluteResourceWithAutoQueue return csConf; } - // TODO: Wangda: I think this test case is not correct, Sunil could help look - // into details. @Test(timeout = 20000) public void testAutoCreateLeafQueueCreation() throws Exception { @@ -182,10 +180,8 @@ public class TestAbsoluteResourceWithAutoQueue ManagedParentQueue parentQueue = (ManagedParentQueue) cs.getQueue(QUEUED); assertEquals(parentQueue, autoCreatedLeafQueue.getParent()); - validateCapacities((AutoCreatedLeafQueue) autoCreatedLeafQueue, 0.4f, - 0.04f, 1f, 0.6f); - validateCapacitiesByLabel((ManagedParentQueue) parentQueue, - (AutoCreatedLeafQueue) autoCreatedLeafQueue, NO_LABEL); + validateCapacities(autoCreatedLeafQueue, 0.4f, 0.04f, 1f, 0.6f); + validateCapacitiesByLabel(parentQueue, autoCreatedLeafQueue, NO_LABEL); Map<String, Float> expectedChildQueueAbsCapacity = new HashMap<String, Float>() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java index 5bd04d0..5df92e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java @@ -89,6 +89,7 @@ import java.util.concurrent.TimeUnit; import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager .NO_LABEL; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.CapacityConfigType.ABSOLUTE_RESOURCE; import static org.apache.hadoop.yarn.server.resourcemanager.scheduler .capacity.CSQueueUtils.EPSILON; import static org.apache.hadoop.yarn.server.resourcemanager.scheduler @@ -166,6 +167,7 @@ public class TestCapacitySchedulerAutoCreatedQueueBase { public static final float NODE_LABEL_GPU_TEMPLATE_CAPACITY = 30.0f; public static final float NODEL_LABEL_SSD_TEMPLATE_CAPACITY = 40.0f; + public static final ImmutableSet<String> RESOURCE_TYPES = ImmutableSet.of("memory", "vcores"); protected MockRM mockRM = null; protected MockNM nm1 = null; @@ -777,17 +779,21 @@ public class TestCapacitySchedulerAutoCreatedQueueBase { * parentQueue.getQueueCapacities().getAbsoluteCapacity(label)); assertEquals(effMinCapacity, Resources.multiply(resourceByLabel, leafQueue.getQueueCapacities().getAbsoluteCapacity(label))); - // TODO: Wangda, I think this is a wrong test, it doesn't consider rounding - // loss of multiplication, the right value should be <10240, 2>, but the - // test expects <10240, 1> - // fixme, address this in the future patch (auto queue creation). -// if (expectedQueueEntitlements.get(label).getCapacity() > EPSILON) { -// assertEquals(Resource.newInstance(10 * GB, 2), -// leafQueue.getEffectiveCapacity(label)); -// } else { -// assertEquals(Resource.newInstance(0, 0), -// leafQueue.getEffectiveCapacity(label)); -// } + + if (expectedQueueEntitlements.get(label).getCapacity() > EPSILON) { + if (leafQueue.getCapacityConfigType().equals(ABSOLUTE_RESOURCE)) { + String templatePrefix = cs.getConfiguration().getAutoCreatedQueueTemplateConfPrefix( + parentQueue.getQueuePath()); + Resource resourceTemplate = parentQueue.getLeafQueueTemplate().getLeafQueueConfigs() + .getMinimumResourceRequirement(label, templatePrefix, RESOURCE_TYPES); + assertEquals(resourceTemplate, leafQueue.getEffectiveCapacity(label)); + } else { + assertEquals(effMinCapacity, leafQueue.getEffectiveCapacity(label)); + } + } else { + assertEquals(Resource.newInstance(0, 0), + leafQueue.getEffectiveCapacity(label)); + } if (leafQueue.getQueueCapacities().getAbsoluteCapacity(label) > 0) { assertTrue(Resources.greaterThan(cs.getResourceCalculator(), --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org