Repository: hadoop Updated Branches: refs/heads/branch-2 ba94c68e0 -> 7e643130f
YARN-6870. Fix floating point inaccuracies in resource availability check in AllocationBasedResourceUtilizationTracker. (Brook Zhou via asuresh) (cherry picked from commit 890e14c02a612c772cecd5dff2411060efd418a3) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7e643130 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7e643130 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7e643130 Branch: refs/heads/branch-2 Commit: 7e643130f2739e3bbaa332b73f95fdb3f3a8eba9 Parents: ba94c68 Author: Arun Suresh <[email protected]> Authored: Fri Jul 28 16:32:43 2017 -0700 Committer: Arun Suresh <[email protected]> Committed: Fri Jul 28 16:33:42 2017 -0700 ---------------------------------------------------------------------- ...locationBasedResourceUtilizationTracker.java | 31 +++++-- .../scheduler/ContainerScheduler.java | 5 +- ...locationBasedResourceUtilizationTracker.java | 93 ++++++++++++++++++++ 3 files changed, 123 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7e643130/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java index 9665e75..c7f1042 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/AllocationBasedResourceUtilizationTracker.java @@ -118,19 +118,40 @@ public class AllocationBasedResourceUtilizationTracker implements return false; } - float vCores = (float) cpuVcores / - getContainersMonitor().getVCoresAllocatedForContainers(); if (LOG.isDebugEnabled()) { LOG.debug("before cpuCheck [asked={} > allowed={}]", - this.containersAllocation.getCPU(), vCores); + this.containersAllocation.getCPU(), + getContainersMonitor().getVCoresAllocatedForContainers()); } - // Check CPU. - if (this.containersAllocation.getCPU() + vCores > 1.0f) { + // Check CPU. Compare using integral values of cores to avoid decimal + // inaccuracies. + if (!hasEnoughCpu(this.containersAllocation.getCPU(), + getContainersMonitor().getVCoresAllocatedForContainers(), cpuVcores)) { return false; } return true; } + /** + * Returns whether there is enough space for coresRequested in totalCores. + * Converts currentAllocation usage to nearest integer count before comparing, + * as floats are inherently imprecise. NOTE: this calculation assumes that + * requested core counts must be integers, and currentAllocation core count + * must also be an integer. + * + * @param currentAllocation The current allocation, a float value from 0 to 1. + * @param totalCores The total cores in the system. + * @param coresRequested The number of cores requested. + * @return True if currentAllocationtotalCores*coresRequested <= + * totalCores. + */ + public boolean hasEnoughCpu(float currentAllocation, long totalCores, + int coresRequested) { + // Must not cast here, as it would truncate the decimal digits. + return Math.round(currentAllocation * totalCores) + + coresRequested <= totalCores; + } + public ContainersMonitor getContainersMonitor() { return this.scheduler.getContainersMonitor(); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/7e643130/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java index 11a8f3f..2413384 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/ContainerScheduler.java @@ -346,7 +346,10 @@ public class ContainerScheduler extends AbstractService implements ResourceUtilization resourcesToFreeUp) { return resourcesToFreeUp.getPhysicalMemory() <= 0 && resourcesToFreeUp.getVirtualMemory() <= 0 && - resourcesToFreeUp.getCPU() <= 0.0f; + // Convert the number of cores to nearest integral number, due to + // imprecision of direct float comparison. + Math.round(resourcesToFreeUp.getCPU() + * getContainersMonitor().getVCoresAllocatedForContainers()) <= 0; } private ResourceUtilization resourcesToFreeUp( http://git-wip-us.apache.org/repos/asf/hadoop/blob/7e643130/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java new file mode 100644 index 0000000..82c2147 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestAllocationBasedResourceUtilizationTracker.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for the {@link AllocationBasedResourceUtilizationTracker} class. + */ +public class TestAllocationBasedResourceUtilizationTracker { + + private ContainerScheduler mockContainerScheduler; + + @Before + public void setup() { + mockContainerScheduler = mock(ContainerScheduler.class); + ContainersMonitor containersMonitor = + new ContainersMonitorImpl(mock(ContainerExecutor.class), + mock(AsyncDispatcher.class), mock(Context.class)); + YarnConfiguration conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.NM_PMEM_MB, 1024); + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, true); + conf.setFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, 2.0f); + conf.setInt(YarnConfiguration.NM_VCORES, 8); + containersMonitor.init(conf); + when(mockContainerScheduler.getContainersMonitor()) + .thenReturn(containersMonitor); + } + + /** + * Node has capacity for 1024 MB and 8 cores. Saturate the node. When full the + * hasResourceAvailable should return false. + */ + @Test + public void testHasResourcesAvailable() { + AllocationBasedResourceUtilizationTracker tracker = + new AllocationBasedResourceUtilizationTracker(mockContainerScheduler); + Container testContainer = mock(Container.class); + when(testContainer.getResource()).thenReturn(Resource.newInstance(512, 4)); + for (int i = 0; i < 2; i++) { + Assert.assertTrue(tracker.hasResourcesAvailable(testContainer)); + tracker.addContainerResources(testContainer); + } + Assert.assertFalse(tracker.hasResourcesAvailable(testContainer)); + } + + /** + * Test the case where the current allocation has been truncated to 0.8888891 + * (8/9 cores used). Request 1 additional core - hasEnoughCpu should return + * true. + */ + @Test + public void testHasEnoughCpu() { + AllocationBasedResourceUtilizationTracker tracker = + new AllocationBasedResourceUtilizationTracker(mockContainerScheduler); + float currentAllocation = 0.8888891f; + long totalCores = 9; + int alreadyUsedCores = 8; + Assert.assertTrue(tracker.hasEnoughCpu(currentAllocation, totalCores, + (int) totalCores - alreadyUsedCores)); + Assert.assertFalse(tracker.hasEnoughCpu(currentAllocation, totalCores, + (int) totalCores - alreadyUsedCores + 1)); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
