YARN-8259.  Improve privileged docker container liveliness checks.
            Contributed by Shane Kumpf


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/22994889
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/22994889
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/22994889

Branch: refs/heads/HDDS-48
Commit: 22994889dc449f966fb6462a3ac3d3bbaee3ac6a
Parents: 69b0596
Author: Eric Yang <ey...@apache.org>
Authored: Wed Jun 13 19:24:31 2018 -0400
Committer: Eric Yang <ey...@apache.org>
Committed: Wed Jun 13 19:24:31 2018 -0400

----------------------------------------------------------------------
 .../runtime/DockerLinuxContainerRuntime.java    | 26 ++++++---------
 .../runtime/LinuxContainerRuntimeConstants.java |  2 ++
 .../runtime/TestDockerContainerRuntime.java     | 34 +++++++++++++-------
 .../src/site/markdown/DockerContainers.md       | 15 +++++++++
 4 files changed, 49 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/22994889/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
index e19379f..f13ba59 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
@@ -191,6 +191,7 @@ public class DockerLinuxContainerRuntime implements 
LinuxContainerRuntime {
   private static final Pattern USER_MOUNT_PATTERN = Pattern.compile(
       "(?<=^|,)([^:\\x00]+):([^:\\x00]+):([a-z]+)");
   private static final int HOST_NAME_LENGTH = 64;
+  private static final String DEFAULT_PROCFS = "/proc";
 
   @InterfaceAudience.Private
   public static final String ENV_DOCKER_CONTAINER_IMAGE =
@@ -1192,24 +1193,15 @@ public class DockerLinuxContainerRuntime implements 
LinuxContainerRuntime {
 
   private void executeLivelinessCheck(ContainerRuntimeContext ctx)
       throws ContainerExecutionException {
-    PrivilegedOperation signalOp = new PrivilegedOperation(
-        PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
-    signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
-        ctx.getExecutionAttribute(USER), Integer.toString(
-            PrivilegedOperation.RunAsUserCommand.SIGNAL_CONTAINER.getValue()),
-        ctx.getExecutionAttribute(PID),
-        Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
-    signalOp.disableFailureLogging();
-    try {
-      privilegedOperationExecutor.executePrivilegedOperation(null, signalOp,
-          null, ctx.getContainer().getLaunchContext().getEnvironment(), false,
-          false);
-    } catch (PrivilegedOperationException e) {
-      String msg = "Liveliness check failed for PID: "
-          + ctx.getExecutionAttribute(PID)
+    String procFs = ctx.getExecutionAttribute(PROCFS);
+    if (procFs == null || procFs.isEmpty()) {
+      procFs = DEFAULT_PROCFS;
+    }
+    String pid = ctx.getExecutionAttribute(PID);
+    if (!new File(procFs + File.separator + pid).exists()) {
+      String msg = "Liveliness check failed for PID: " + pid
           + ". Container may have already completed.";
-      throw new ContainerExecutionException(msg, e.getExitCode(), 
e.getOutput(),
-          e.getErrorOutput());
+      throw new ContainerExecutionException(msg);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/22994889/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java
index 579e03b..2f4aad4 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/LinuxContainerRuntimeConstants.java
@@ -89,4 +89,6 @@ public final class LinuxContainerRuntimeConstants {
       String.class, "pid");
   public static final Attribute<ContainerExecutor.Signal> SIGNAL = Attribute
       .attribute(ContainerExecutor.Signal.class, "signal");
+  public static final Attribute<String> PROCFS = Attribute.attribute(
+      String.class, "procfs");
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hadoop/blob/22994889/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
index b6de366..d85c403 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java
@@ -54,7 +54,9 @@ import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.Contai
 import 
org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
 import org.junit.Assert;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
 import org.slf4j.Logger;
@@ -98,6 +100,7 @@ import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.r
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.NM_PRIVATE_TOKENS_PATH;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PID_FILE_PATH;
+import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.PROCFS;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RESOURCES_OPTIONS;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.RUN_AS_USER;
 import static 
org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.SIGNAL;
@@ -153,6 +156,9 @@ public class TestDockerContainerRuntime {
   private final String signalPid = "1234";
   private int dockerStopGracePeriod;
 
+  @Rule
+  public TemporaryFolder tempDir = new TemporaryFolder();
+
   @Before
   public void setup() {
     String tmpPath = new StringBuffer(System.getProperty("test.build.data"))
@@ -1470,9 +1476,24 @@ public class TestDockerContainerRuntime {
   }
 
   @Test
-  public void testContainerLivelinessCheck()
-      throws ContainerExecutionException, PrivilegedOperationException {
+  public void testContainerLivelinessFileExistsNoException() throws Exception {
+    File testTempDir = tempDir.newFolder();
+    File procPidPath = new File(testTempDir + File.separator + signalPid);
+    procPidPath.createNewFile();
+    procPidPath.deleteOnExit();
+    DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
+        mockExecutor, mockCGroupsHandler);
+    builder.setExecutionAttribute(RUN_AS_USER, runAsUser)
+        .setExecutionAttribute(USER, user)
+        .setExecutionAttribute(PID, signalPid)
+        .setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL)
+        .setExecutionAttribute(PROCFS, testTempDir.getAbsolutePath());
+    runtime.initialize(enableMockContainerExecutor(conf), null);
+    runtime.signalContainer(builder.build());
+  }
 
+  @Test(expected = ContainerExecutionException.class)
+  public void testContainerLivelinessNoFileException() throws Exception {
     DockerLinuxContainerRuntime runtime = new DockerLinuxContainerRuntime(
         mockExecutor, mockCGroupsHandler);
     builder.setExecutionAttribute(RUN_AS_USER, runAsUser)
@@ -1481,15 +1502,6 @@ public class TestDockerContainerRuntime {
         .setExecutionAttribute(SIGNAL, ContainerExecutor.Signal.NULL);
     runtime.initialize(enableMockContainerExecutor(conf), null);
     runtime.signalContainer(builder.build());
-
-    PrivilegedOperation op = capturePrivilegedOperation();
-    Assert.assertEquals(op.getOperationType(),
-        PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
-    Assert.assertEquals(runAsUser, op.getArguments().get(0));
-    Assert.assertEquals(submittingUser, op.getArguments().get(1));
-    Assert.assertEquals("2", op.getArguments().get(2));
-    Assert.assertEquals("1234", op.getArguments().get(3));
-    Assert.assertEquals("0", op.getArguments().get(4));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hadoop/blob/22994889/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
----------------------------------------------------------------------
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
index c6f965a..a2ef6fe 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md
@@ -378,6 +378,21 @@ Trusted images are allowed to mount external devices such 
as HDFS via NFS gatewa
 
 For [YARN Service HTTPD example](./yarn-service/Examples.html), 
container-executor.cfg must define centos docker registry to be trusted for the 
example to run.
 
+Container Reacquisition Requirements
+------------------------------------
+On restart, the NodeManager, as part of the NodeManager's recovery process, 
will
+validate that a container is still running by checking for the existence of the
+container's PID directory in the /proc filesystem. For security purposes,
+operating system administrator may enable the _hidepid_ mount option for the
+/proc filesystem. If the _hidepid_ option is enabled, the _yarn_ user's primary
+group must be whitelisted by setting the gid mount flag similar to below.
+Without the _yarn_ user's primary group whitelisted, container reacquisition
+will fail and the container will be killed on NodeManager restart.
+
+```
+proc     /proc     proc     nosuid,nodev,noexec,hidepid=2,gid=yarn     0 0
+```
+
 Connecting to a Secure Docker Repository
 ----------------------------------------
 


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to