This is an automated email from the ASF dual-hosted git repository.

snemeth pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 0ede873  YARN-9337. GPU auto-discovery script runs even when the 
resource is given by hand. Contributed by Adam Antal
0ede873 is described below

commit 0ede873090f7b7c0163288b4cec748afd9ae2f4a
Author: Szilard Nemeth <snem...@apache.org>
AuthorDate: Fri Jul 12 17:28:14 2019 +0200

    YARN-9337. GPU auto-discovery script runs even when the resource is given 
by hand. Contributed by Adam Antal
    
    (cherry picked from commit 61b0c2bb7c0f18c4a666b96ca1603cbd4d27eb6d)
---
 .../resourceplugin/gpu/GpuDiscoverer.java          | 60 +++++++++++++---------
 .../resourceplugin/gpu/TestGpuDiscoverer.java      | 19 ++++++-
 2 files changed, 53 insertions(+), 26 deletions(-)

diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
index 6cf6a8d..27a4ea1 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.java
@@ -69,6 +69,8 @@ public class GpuDiscoverer {
   private int numOfErrorExecutionSinceLastSucceed = 0;
   private GpuDeviceInformation lastDiscoveredGpuInformation = null;
 
+  private List<GpuDevice> gpuDevicesFromUser;
+
   private void validateConfOrThrowException() throws YarnException {
     if (conf == null) {
       throw new YarnException("Please initialize (call initialize) before use "
@@ -143,6 +145,14 @@ public class GpuDiscoverer {
     }
   }
 
+  private boolean IsAutoDiscoveryEnabled() {
+    String allowedDevicesStr = conf.get(
+        YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+    return allowedDevicesStr.equals(
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+  }
+
   /**
    * Get list of GPU devices usable by YARN.
    *
@@ -153,15 +163,13 @@ public class GpuDiscoverer {
       throws YarnException {
     validateConfOrThrowException();
 
-    String allowedDevicesStr = conf.get(
-        YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
-        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
-
-    if (allowedDevicesStr.equals(
-        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES)) {
+    if (IsAutoDiscoveryEnabled()) {
       return parseGpuDevicesFromAutoDiscoveredGpuInfo();
     } else {
-      return parseGpuDevicesFromUserDefinedValues(allowedDevicesStr);
+      if (gpuDevicesFromUser == null) {
+        gpuDevicesFromUser = parseGpuDevicesFromUserDefinedValues();
+      }
+      return gpuDevicesFromUser;
     }
   }
 
@@ -193,16 +201,16 @@ public class GpuDiscoverer {
   }
 
   /**
-   * @param devices allowed devices coming from the config.
-   *                          Individual devices should be separated by commas.
-   *                          <br>The format of individual devices should be:
-   *                           &lt;index:&gt;&lt;minorNumber&gt;
    * @return List of GpuDevices
    * @throws YarnException when a GPU device is defined as a duplicate.
    * The first duplicate GPU device will be added to the exception message.
    */
-  private List<GpuDevice> parseGpuDevicesFromUserDefinedValues(String devices)
+  private List<GpuDevice> parseGpuDevicesFromUserDefinedValues()
       throws YarnException {
+    String devices = conf.get(
+        YarnConfiguration.NM_GPU_ALLOWED_DEVICES,
+        YarnConfiguration.AUTOMATICALLY_DISCOVER_GPU_DEVICES);
+
     if (devices.trim().isEmpty()) {
       throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
     }
@@ -244,19 +252,21 @@ public class GpuDiscoverer {
   public synchronized void initialize(Configuration config)
       throws YarnException {
     this.conf = config;
-    numOfErrorExecutionSinceLastSucceed = 0;
-    lookUpAutoDiscoveryBinary(config);
-
-    // Try to discover GPU information once and print
-    try {
-      LOG.info("Trying to discover GPU information ...");
-      GpuDeviceInformation info = getGpuDeviceInformation();
-      LOG.info("Discovered GPU information: " + info.toString());
-    } catch (YarnException e) {
-      String msg =
-          "Failed to discover GPU information from system, exception message:"
-              + e.getMessage() + " continue...";
-      LOG.warn(msg);
+    if (IsAutoDiscoveryEnabled()) {
+      numOfErrorExecutionSinceLastSucceed = 0;
+      lookUpAutoDiscoveryBinary(config);
+
+      // Try to discover GPU information once and print
+      try {
+        LOG.info("Trying to discover GPU information ...");
+        GpuDeviceInformation info = getGpuDeviceInformation();
+        LOG.info("Discovered GPU information: " + info.toString());
+      } catch (YarnException e) {
+        String msg =
+                "Failed to discover GPU information from system, exception 
message:"
+                        + e.getMessage() + " continue...";
+        LOG.warn(msg);
+      }
     }
   }
 
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
index ff64e04..a70e668 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuDiscoverer.java
@@ -40,6 +40,7 @@ import java.util.List;
 import java.util.function.Consumer;
 
 import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows;
+import static 
org.apache.hadoop.yarn.conf.YarnConfiguration.NM_GPU_ALLOWED_DEVICES;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer.DEFAULT_BINARY_NAME;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.not;
@@ -49,6 +50,9 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 
 public class TestGpuDiscoverer {
   private static final Logger LOG = LoggerFactory.getLogger(
@@ -96,7 +100,7 @@ public class TestGpuDiscoverer {
 
   private Configuration createConfigWithAllowedDevices(String s) {
     Configuration conf = new Configuration(false);
-    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, s);
+    conf.set(NM_GPU_ALLOWED_DEVICES, s);
     setupFakeBinary(conf);
     return conf;
   }
@@ -495,4 +499,17 @@ public class TestGpuDiscoverer {
           "executable in the default directories:"));
     }
   }
+
+  @Test
+  public void testScriptNotCalled() throws YarnException {
+    Configuration conf = new Configuration();
+    conf.set(YarnConfiguration.NM_GPU_ALLOWED_DEVICES, "0:1,2:3");
+
+    GpuDiscoverer gpuSpy = spy(GpuDiscoverer.class);
+
+    gpuSpy.initialize(conf);
+    gpuSpy.getGpusUsableByYarn();
+
+    verify(gpuSpy, never()).getGpuDeviceInformation();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to