This is an automated email from the ASF dual-hosted git repository.
snemeth pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new c61c969 YARN-9235. If linux container executor is not set for a GPU
cluster GpuResourceHandlerImpl is not initialized and NPE is thrown.
Contributed by Antal Balint Steinbach, Adam Antal
c61c969 is described below
commit c61c9696689399e339c0d4a45e588d9f39f8d819
Author: Szilard Nemeth <[email protected]>
AuthorDate: Fri Jul 12 16:51:58 2019 +0200
YARN-9235. If linux container executor is not set for a GPU cluster
GpuResourceHandlerImpl is not initialized and NPE is thrown. Contributed by
Antal Balint Steinbach, Adam Antal
(cherry picked from commit c416284bb7581747beef36d7899d8680fe33abbd)
---
.../resourceplugin/gpu/GpuResourcePlugin.java | 22 +++++++++
.../resourceplugin/gpu/TestGpuResourcePlugin.java | 54 ++++++++++++++++++++++
2 files changed, 76 insertions(+)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
index 393d76e..1ac6f83 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.java
@@ -18,6 +18,7 @@
package
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
@@ -33,8 +34,14 @@ import
org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInforma
import
org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class GpuResourcePlugin implements ResourcePlugin {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(GpuResourcePlugin.class);
+
private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
private final GpuDiscoverer gpuDiscoverer;
private GpuResourceHandlerImpl gpuResourceHandler = null;
@@ -84,6 +91,10 @@ public class GpuResourcePlugin implements ResourcePlugin {
public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
GpuDeviceInformation gpuDeviceInformation =
gpuDiscoverer.getGpuDeviceInformation();
+
+ //At this point the gpu plugin is already enabled
+ checkGpuResourceHandler();
+
GpuResourceAllocator gpuResourceAllocator =
gpuResourceHandler.getGpuAllocator();
List<GpuDevice> totalGpus = gpuResourceAllocator.getAllowedGpusCopy();
@@ -94,6 +105,17 @@ public class GpuResourcePlugin implements ResourcePlugin {
assignedGpuDevices);
}
+ private void checkGpuResourceHandler() throws YarnException {
+ if(gpuResourceHandler == null) {
+ String errorMsg =
+ "Linux Container Executor is not configured for the NodeManager. "
+ + "To fully enable GPU feature on the node also set "
+ + YarnConfiguration.NM_CONTAINER_EXECUTOR + " properly.";
+ LOG.warn(errorMsg);
+ throw new YarnException(errorMsg);
+ }
+ }
+
@Override
public String toString() {
return GpuResourcePlugin.class.getName();
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
new file mode 100644
index 0000000..888f899
--- /dev/null
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/TestGpuResourcePlugin.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package
org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;
+
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.Test;
+
+public class TestGpuResourcePlugin {
+
+ @Test(expected = YarnException.class)
+ public void testResourceHandlerNotInitialized() throws YarnException {
+ GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+ GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+ mock(GpuNodeResourceUpdateHandler.class);
+
+ GpuResourcePlugin target =
+ new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+ target.getNMResourceInfo();
+ }
+
+ @Test
+ public void testResourceHandlerIsInitialized() throws YarnException {
+ GpuDiscoverer gpuDiscoverer = mock(GpuDiscoverer.class);
+ GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler =
+ mock(GpuNodeResourceUpdateHandler.class);
+
+ GpuResourcePlugin target =
+ new GpuResourcePlugin(gpuNodeResourceUpdateHandler, gpuDiscoverer);
+
+ target.createResourceHandler(null, null, null);
+
+ //Not throwing any exception
+ target.getNMResourceInfo();
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]