This is an automated email from the ASF dual-hosted git repository.
snemeth pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 61b0c2b YARN-9337. GPU auto-discovery script runs even when the
resource is given by hand. Contributed by Adam Antal
61b0c2b is described below
commit 61b0c2bb7c0f18c4a666b96ca1603cbd4d27eb6d
Author: Szilard Nemeth <[email protected]>
AuthorDate: Fri Jul 12 17:28:14 2019 +0200
YARN-9337. GPU auto-discovery script runs even when the resource is given
by hand. Contributed by Adam Antal
---
.../resourceplugin/gpu/GpuDiscoverer.java | 60 +++++++++++++---------
.../resourceplugin/gpu/TestGpuDiscoverer.java | 19 ++++++-
2 files changed, 53 insertions(+), 26 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 0c55478..b52d767 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -69,6 +69,8 @@ public class GpuDiscoverer {
private int numOfErrorExecutionSinceLastSucceed = 0;
private GpuDeviceInformation lastDiscoveredGpuInformation = null;
+ private List<GpuDevice> gpuDevicesFromUser;
+
private void validateConfOrThrowException() throws YarnException {
if (conf == null) {
throw new YarnException("Please initialize (call initialize) before use "
@@ -141,6 +143,14 @@ public class GpuDiscoverer {
}
}
+ private boolean IsAutoDiscoveryEnabled() {
+ String allowedDevicesStr = conf.get(
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+ return allowedDevicesStr.equals(
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+ }
+
/**
* Get list of GPU devices usable by YARN.
*
@@ -151,15 +161,13 @@ public class GpuDiscoverer {
throws YarnException {
validateConfOrThrowException();
- String allowedDevicesStr = conf.get(
- YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
- YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
-
- if (allowedDevicesStr.equals(
- YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
+ if (IsAutoDiscoveryEnabled()) {
return parseGpuDevicesFromAutoDiscoveredGpuInfo();
} else {
- return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
+ if (gpuDevicesFromUser == null) {
+ gpuDevicesFromUser = parseGpuDevicesFromUserDefinedValues();
+ }
+ return gpuDevicesFromUser;
}
}
@@ -191,16 +199,16 @@ public class GpuDiscoverer {
}
/**
- * @param devices allowed devices coming from the config.
- * Individual devices should be separated by commas.
- * <br>The format of individual devices should be:
- * <index:><minorNumber>
* @return List of GpuDevices
* @throws YarnException when a GPU device is defined as a duplicate.
* The first duplicate GPU device will be added to the exception message.
*/
- private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
+ private List<GpuDevice> parseGpuDevicesFromUserDefinedValues()
throws YarnException {
+ String devices = conf.get(
+ YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+ YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+
if (devices.trim().isEmpty()) {
throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
}
@@ -242,19 +250,21 @@ public class GpuDiscoverer {
public synchronized void initialize(Configuration config)
throws YarnException {
this.conf = config;
- numOfErrorExecutionSinceLastSucceed = 0;
- lookUpAutoDiscoveryBinary(config);
-
- // Try to discover GPU information once and print
- try {
- LOG.info("Trying to discover GPU information ...");
- GpuDeviceInformation info = getGpuDeviceInformation();
- LOG.info("Discovered GPU information: " + info.toString());
- } catch (YarnException e) {
- String msg =
- "Failed to discover GPU information from system, exception message:"
- + e.getMessage() + " continue...";
- LOG.warn(msg);
+ if (IsAutoDiscoveryEnabled()) {
+ numOfErrorExecutionSinceLastSucceed = 0;
+ lookUpAutoDiscoveryBinary(config);
+
+ // Try to discover GPU information once and print
+ try {
+ LOG.info("Trying to discover GPU information ...");
+ GpuDeviceInformation info = getGpuDeviceInformation();
+ LOG.info("Discovered GPU information: " + info.toString());
+ } catch (YarnException e) {
+ String msg =
+ "Failed to discover GPU information from system, exception
message:"
+ + e.getMessage() + " continue...";
+ LOG.warn(msg);
+ }
}
}
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index ff64e04..a70e668 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -40,6 +40,7 @@ import java.util.List;
import java.util.function.Consumer;
import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows;
+import static
org.apache.hadoop.yarn.conf.YarnConfiguration.NM_GPU_ALLOWED_DEVICES;
import static
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer.DEFAULT_BINARY_NAME;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.not;
@@ -49,6 +50,9 @@ import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
public class TestGpuDiscoverer {
private static final Logger LOG = LoggerFactory.getLogger(
@@ -96,7 +100,7 @@ public class TestGpuDiscoverer {
private Configuration createConfigWithAllowedDevices(String s) {
Configuration conf = new Configuration(false);
- conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+ conf.set(NM_GPU_ALLOWED_DEVICES, s);
setupFakeBinary(conf);
return conf;
}
@@ -495,4 +499,17 @@ public class TestGpuDiscoverer {
"executable in the default directories:"));
}
}
+
+ @Test
+ public void testScriptNotCalled() throws YarnException {
+ Configuration conf = new Configuration();
+ conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:1,2:3");
+
+ GpuDiscoverer gpuSpy = spy(GpuDiscoverer.class);
+
+ gpuSpy.initialize(conf);
+ gpuSpy.getGpusUsableByYarn();
+
+ verify(gpuSpy, never()).getGpuDeviceInformation();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]