This is an automated email from the ASF dual-hosted git repository.
bteke pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new f00094203bf YARN-11709. NodeManager should be shut down or blacklisted
when it cacannot run program /var/lib/yarn-ce/bin/container-executor (#6960)
f00094203bf is described below
commit f00094203bf40a8c3f2216cf22eaa5599e3b9b4d
Author: Ferenc Erdelyi <55103964+ferde...@users.noreply.github.com>
AuthorDate: Fri Aug 16 16:33:10 2024 +0200
YARN-11709. NodeManager should be shut down or blacklisted when it cacannot
run program /var/lib/yarn-ce/bin/container-executor (#6960)
---
.../server/nodemanager/LinuxContainerExecutor.java | 6 ++--
.../TestLinuxContainerExecutorWithMocks.java | 35 ++++++++++++++++++++--
2 files changed, 36 insertions(+), 5 deletions(-)
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 19335045c86..19c06736035 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -451,8 +451,10 @@ public class LinuxContainerExecutor extends
ContainerExecutor {
} catch (PrivilegedOperationException e) {
int exitCode = e.getExitCode();
- LOG.warn("Exit code from container {} startLocalizer is : {}",
- locId, exitCode, e);
+ LOG.error("Unrecoverable issue occurred. Marking the node as unhealthy
to prevent "
+ + "further containers to get scheduled on the node and cause
application failures. " +
+ "Exit code from the container " + locId + "startLocalizer is : " +
exitCode, e);
+ nmContext.getNodeStatusUpdater().reportException(e);
throw new IOException("Application " + appId + " initialization failed" +
" (exitCode=" + exitCode + ") with output: " + e.getOutput(), e);
diff --git
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
index 3d9d33c5a10..7d49cab4a86 100644
---
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
+++
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
@@ -26,6 +26,7 @@ import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
@@ -37,6 +38,7 @@ import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
+import java.lang.reflect.Field;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URISyntaxException;
@@ -345,7 +347,8 @@ public class TestLinuxContainerExecutorWithMocks {
@Test
public void testContainerLaunchError()
- throws IOException, ContainerExecutionException, URISyntaxException {
+ throws IOException, ContainerExecutionException, URISyntaxException,
IllegalAccessException,
+ NoSuchFieldException {
final String[] expecetedMessage = {"badcommand", "Exit code: 24"};
final String[] executor = {
@@ -387,6 +390,14 @@ public class TestLinuxContainerExecutorWithMocks {
dirsHandler.init(conf);
mockExec.setConf(conf);
+ //set the private nmContext field without initing the
LinuxContainerExecutor
+ NodeManager nodeManager = new NodeManager();
+ NodeManager.NMContext nmContext =
+ nodeManager.createNMContext(null, null, null, false, conf);
+ Field lceNmContext =
LinuxContainerExecutor.class.getDeclaredField("nmContext");
+ lceNmContext.setAccessible(true);
+ lceNmContext.set(mockExec, nmContext);
+
String appSubmitter = "nobody";
String cmd = String
.valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.
@@ -601,8 +612,6 @@ public class TestLinuxContainerExecutorWithMocks {
LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime(
spyPrivilegedExecutor);
runtime.initialize(conf, null);
- mockExec = new LinuxContainerExecutor(runtime);
- mockExec.setConf(conf);
LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) {
@Override
protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
@@ -610,6 +619,23 @@ public class TestLinuxContainerExecutorWithMocks {
}
};
lce.setConf(conf);
+
+ //set the private nmContext field without initing the
LinuxContainerExecutor
+ NodeManager nodeManager = new NodeManager();
+ NodeManager.NMContext nmContext =
+ nodeManager.createNMContext(null, null, null, false, conf);
+ NodeManager.NMContext spyNmContext = spy(nmContext);
+
+ //initialize a mock NodeStatusUpdater
+ NodeStatusUpdaterImpl nodeStatusUpdater =
mock(NodeStatusUpdaterImpl.class);
+ nmContext.setNodeStatusUpdater(nodeStatusUpdater);
+ //imitate a void method call on the NodeStatusUpdater when setting NM
unhealthy.
+ doNothing().when(nodeStatusUpdater).reportException(any());
+
+ Field lceNmContext =
LinuxContainerExecutor.class.getDeclaredField("nmContext");
+ lceNmContext.setAccessible(true);
+ lceNmContext.set(lce, nmContext);
+
InetSocketAddress address = InetSocketAddress.createUnresolved(
"localhost", 8040);
Path nmPrivateCTokensPath= new Path("file:///bin/nmPrivateCTokensPath");
@@ -672,6 +698,9 @@ public class TestLinuxContainerExecutorWithMocks {
assertTrue("Unexpected exception " + e,
e.getMessage().contains("exit code"));
}
+
+ //verify that the NM was set unhealthy on PrivilegedOperationException
+ verify(nodeStatusUpdater, times(1)).reportException(any());
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org