This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new a535849  Assign cgroup processes after configuring the subsystem.
a535849 is described below

commit a535849448209356b14f18958b1738fa7d4459a9
Author: James Peach <[email protected]>
AuthorDate: Tue Jun 18 13:44:45 2019 -0700

    Assign cgroup processes after configuring the subsystem.
    
    Currently, the PID targeted by the cgroups isolator is moved into
    the cgroup before the subsystem runs to apply any type-specific
    cgroup configuration. We should reverse the order of this so that
    the PID is only moved once the cgroup is fully configured by the
    subsystem.
    
    A specific case that can happen is where a PID is assigned to a net_cls
    cgroup before that cgroup has its class ID set.  This intermediate
    process state can be observed by system monitoring process, causing
    confusion that is hard to debug.
    
    Review: https://reviews.apache.org/r/70863/
---
 .../mesos/isolators/cgroups/cgroups.cpp            | 114 +++++++++++----------
 .../mesos/isolators/cgroups/cgroups.hpp            |   4 +-
 2 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp 
b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
index e7819d7..b12b73d 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.cpp
@@ -663,48 +663,6 @@ Future<Nothing> CgroupsIsolatorProcess::isolate(
     const ContainerID& containerId,
     pid_t pid)
 {
-  // If we are a nested container, we inherit
-  // the cgroup from our root ancestor.
-  ContainerID rootContainerId = protobuf::getRootContainerId(containerId);
-
-  if (!infos.contains(rootContainerId)) {
-    return Failure("Failed to isolate the container: Unknown root container");
-  }
-
-  const string& cgroup = infos[rootContainerId]->cgroup;
-
-  // TODO(haosdent): Use foreachkey once MESOS-5037 is resolved.
-  foreach (const string& hierarchy, subsystems.keys()) {
-    // If new cgroup subsystems are added after the agent
-    // upgrade, the newly added cgroup subsystems do not
-    // exist on old container's cgroup hierarchy. So skip
-    // assigning the pid to this cgroup subsystem.
-    if (containerId.has_parent() && !cgroups::exists(hierarchy, cgroup)) {
-      LOG(INFO) << "Skipping assigning pid " << stringify(pid)
-                << " to cgroup at '" << path::join(hierarchy, cgroup)
-                << "' for container " << containerId
-                << " because its parent container " << containerId.parent()
-                << " does not have this cgroup hierarchy";
-      continue;
-    }
-
-    Try<Nothing> assign = cgroups::assign(
-        hierarchy,
-        cgroup,
-        pid);
-
-    if (assign.isError()) {
-      string message =
-        "Failed to assign container " + stringify(containerId) +
-        " pid " + stringify(pid) + " to cgroup at '" +
-        path::join(hierarchy, cgroup) + "': " + assign.error();
-
-      LOG(ERROR) << message;
-
-      return Failure(message);
-    }
-  }
-
   // We currently can't call `subsystem->isolate()` on nested
   // containers, because we don't call `prepare()`, `recover()`, or
   // `cleanup()` on them either. If we were to call `isolate()` on
@@ -717,30 +675,39 @@ Future<Nothing> CgroupsIsolatorProcess::isolate(
   // TODO(klueska): In the future we should revisit this to make
   // sure that doing things this way is sufficient (or otherwise
   // update our invariants to allow us to call this here).
-  if (containerId.has_parent()) {
-    return Nothing();
-  }
 
   vector<Future<Nothing>> isolates;
-  foreachvalue (const Owned<Subsystem>& subsystem, subsystems) {
-    isolates.push_back(subsystem->isolate(
-        containerId,
-        infos[containerId]->cgroup,
-        pid));
+
+  if (!containerId.has_parent()) {
+    foreachvalue (const Owned<Subsystem>& subsystem, subsystems) {
+      isolates.push_back(subsystem->isolate(
+          containerId,
+          infos[containerId]->cgroup,
+          pid));
+    }
   }
 
+  // We isolate all the subsystems first so that the subsystem is
+  // fully configured before assigning the process. This improves
+  // the atomicity of the assignment for any system processes that
+  // observe it.
   return await(isolates)
     .then(defer(
         PID<CgroupsIsolatorProcess>(this),
         &CgroupsIsolatorProcess::_isolate,
-        lambda::_1));
+        lambda::_1,
+        containerId,
+        pid));
 }
 
 
 Future<Nothing> CgroupsIsolatorProcess::_isolate(
-    const vector<Future<Nothing>>& futures)
+    const vector<Future<Nothing>>& futures,
+    const ContainerID& containerId,
+    pid_t pid)
 {
   vector<string> errors;
+
   foreach (const Future<Nothing>& future, futures) {
     if (!future.isReady()) {
       errors.push_back((future.isFailed()
@@ -749,12 +716,53 @@ Future<Nothing> CgroupsIsolatorProcess::_isolate(
     }
   }
 
-  if (errors.size() > 0) {
+  if (!errors.empty()) {
     return Failure(
         "Failed to isolate subsystems: " +
         strings::join(";", errors));
   }
 
+  // If we are a nested container, we inherit the cgroup from our parent.
+  const ContainerID rootContainerId = 
protobuf::getRootContainerId(containerId);
+
+  if (!infos.contains(rootContainerId)) {
+    return Failure("Failed to isolate the container: Unknown root container");
+  }
+
+  const string& cgroup = infos[rootContainerId]->cgroup;
+
+  // TODO(haosdent): Use foreachkey once MESOS-5037 is resolved.
+  foreach (const string& hierarchy, subsystems.keys()) {
+    // If new cgroup subsystems are added after the agent
+    // upgrade, the newly added cgroup subsystems do not
+    // exist on old container's cgroup hierarchy. So skip
+    // assigning the pid to this cgroup subsystem.
+    if (containerId.has_parent() && !cgroups::exists(hierarchy, cgroup)) {
+      LOG(INFO) << "Skipping assigning pid " << stringify(pid)
+                << " to cgroup at '" << path::join(hierarchy, cgroup)
+                << "' for container " << containerId
+                << " because its parent container " << containerId.parent()
+                << " does not have this cgroup hierarchy";
+      continue;
+    }
+
+    Try<Nothing> assign = cgroups::assign(
+        hierarchy,
+        cgroup,
+        pid);
+
+    if (assign.isError()) {
+      string message =
+        "Failed to assign container " + stringify(containerId) +
+        " pid " + stringify(pid) + " to cgroup at '" +
+        path::join(hierarchy, cgroup) + "': " + assign.error();
+
+      LOG(ERROR) << message;
+
+      return Failure(message);
+    }
+  }
+
   return Nothing();
 }
 
diff --git a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp 
b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp
index 4a1871b..4bd3d6d 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups/cgroups.hpp
@@ -130,7 +130,9 @@ private:
       const mesos::slave::ContainerConfig& containerConfig);
 
   process::Future<Nothing> _isolate(
-      const std::vector<process::Future<Nothing>>& futures);
+      const std::vector<process::Future<Nothing>>& futures,
+      const ContainerID& containerId,
+      pid_t pid);
 
   void _watch(
       const ContainerID& containerId,

Reply via email to