This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new fd0817805 [cgroups2] Conditionally initialize cgroups v2 in the 
agent's main.
fd0817805 is described below

commit fd0817805de9ddda80353d8968d942e4f17b9f5f
Author: Devin Leamy <[email protected]>
AuthorDate: Fri Mar 29 13:59:03 2024 -0400

    [cgroups2] Conditionally initialize cgroups v2 in the agent's main.
    
    We auto-detect whether cgroups v1 or v2 is enabled on the host, and
    initialize v1 or v2 accordingly.
    
    This closes #539
---
 src/slave/containerizer/mesos/containerizer.cpp |  54 +++++--
 src/slave/main.cpp                              | 184 +++++++++++++++++++++++-
 2 files changed, 216 insertions(+), 22 deletions(-)

diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index 31d45a0a5..2fc1ee56e 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -116,6 +116,13 @@
 #include "slave/containerizer/mesos/isolators/volume/image.hpp"
 #include "slave/containerizer/mesos/isolators/volume/secret.hpp"
 #include "slave/containerizer/mesos/isolators/volume/csi/isolator.hpp"
+
+#ifdef ENABLE_CGROUPS_V2
+#include "linux/cgroups2.hpp"
+
+#include "slave/containerizer/mesos/isolators/cgroups2/cgroups2.hpp"
+#endif // ENABLE_CGROUPS_V2
+
 #endif // __linux__
 
 #if ENABLE_SECCOMP_ISOLATOR
@@ -363,6 +370,23 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 
   Shared<Provisioner> provisioner = _provisioner->share();
 
+#ifdef __linux__
+  // Initialize either the cgroups v2 or cgroups v1 isolator, based on what
+  // is available on the host machine.
+  auto cgroupsIsolatorSelector = [] (const Flags& flags) -> Try<Isolator*> {
+#ifdef ENABLE_CGROUPS_V2
+    Try<bool> mounted = cgroups2::mounted();
+    if (mounted.isError()) {
+      return Error("Failed to determine if the cgroup2 filesystem is mounted: "
+                   + mounted.error());
+    }
+    if (*mounted) {
+      return Cgroups2IsolatorProcess::create(flags);
+    }
+#endif // ENABLE_CGROUPS_V2
+    return CgroupsIsolatorProcess::create(flags);
+  };
+#endif // __linux__
   // Built-in isolator definitions.
   //
   // The order of the entries in this table specifies the ordering of the
@@ -424,17 +448,17 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 #endif // __WINDOWS__
 
 #ifdef __linux__
-    {"cgroups/all", &CgroupsIsolatorProcess::create},
-    {"cgroups/blkio", &CgroupsIsolatorProcess::create},
-    {"cgroups/cpu", &CgroupsIsolatorProcess::create},
-    {"cgroups/cpuset", &CgroupsIsolatorProcess::create},
-    {"cgroups/devices", &CgroupsIsolatorProcess::create},
-    {"cgroups/hugetlb", &CgroupsIsolatorProcess::create},
-    {"cgroups/mem", &CgroupsIsolatorProcess::create},
-    {"cgroups/net_cls", &CgroupsIsolatorProcess::create},
-    {"cgroups/net_prio", &CgroupsIsolatorProcess::create},
-    {"cgroups/perf_event", &CgroupsIsolatorProcess::create},
-    {"cgroups/pids", &CgroupsIsolatorProcess::create},
+    {"cgroups/all", cgroupsIsolatorSelector},
+    {"cgroups/blkio", cgroupsIsolatorSelector},
+    {"cgroups/cpu", cgroupsIsolatorSelector},
+    {"cgroups/cpuset", cgroupsIsolatorSelector},
+    {"cgroups/devices", cgroupsIsolatorSelector},
+    {"cgroups/hugetlb", cgroupsIsolatorSelector},
+    {"cgroups/mem", cgroupsIsolatorSelector},
+    {"cgroups/net_cls", cgroupsIsolatorSelector},
+    {"cgroups/net_prio", cgroupsIsolatorSelector},
+    {"cgroups/perf_event", cgroupsIsolatorSelector},
+    {"cgroups/pids", cgroupsIsolatorSelector},
 
     {"appc/runtime", &AppcRuntimeIsolatorProcess::create},
     {"docker/runtime", &DockerRuntimeIsolatorProcess::create},
@@ -535,9 +559,9 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 
   vector<Owned<Isolator>> isolators;
 
-  // Note: For cgroups, we only create `CgroupsIsolatorProcess` once.
-  // We use this flag to identify whether `CgroupsIsolatorProcess` has
-  // been created or not.
+  // Note: For cgroups, we only create `CgroupsIsolatorProcess` or
+  // `Cgroups2IsolatorProcess` once. We use this flag to identify whether
+  // either has been created.
   bool cgroupsIsolatorCreated = false;
 
   // First, apply the built-in isolators, in dependency order.
@@ -548,7 +572,7 @@ Try<MesosContainerizer*> MesosContainerizer::create(
 
     if (strings::startsWith(creator.first, "cgroups/")) {
       if (cgroupsIsolatorCreated) {
-        // Skip when `CgroupsIsolatorProcess` have already been created.
+        // Skip when `Cgroups(2)IsolatorProcess` have already been created.
         continue;
       }
 
diff --git a/src/slave/main.cpp b/src/slave/main.cpp
index dbee337c3..f4e7f3b64 100644
--- a/src/slave/main.cpp
+++ b/src/slave/main.cpp
@@ -76,6 +76,12 @@
 #ifdef __linux__
 #include "linux/cgroups.hpp"
 #include "linux/systemd.hpp"
+
+#ifdef ENABLE_CGROUPS_V2
+#include "linux/cgroups2.hpp"
+#include "slave/containerizer/mesos/isolators/cgroups2/cgroups2.hpp"
+#endif // ENABLE_CGROUPS_V2
+
 #endif // __linux__
 
 #include "logging/logging.hpp"
@@ -85,6 +91,7 @@
 
 #include "module/manager.hpp"
 
+#include "slave/containerizer/mesos/paths.hpp"
 #include "slave/constants.hpp"
 #include "slave/csi_server.hpp"
 #include "slave/gc.hpp"
@@ -147,6 +154,147 @@ const char* malloc_conf = "narenas:4";
 
 
 #ifdef __linux__
+
+#ifdef ENABLE_CGROUPS_V2
+// Log any processes inside of a cgroup.
+static Try<Nothing> logProcesses(const string& cgroup)
+{
+  Try<set<pid_t>> processes = cgroups2::processes(cgroup);
+  if (processes.isError()) {
+    return Error("Failed to check for existing processes"
+                 " in cgroup '" + cgroup + "': " + processes.error());
+  }
+
+  if (!processes->empty()) {
+    vector<string> infos;
+    foreach (pid_t pid, *processes) {
+      Result<os::Process> proc = os::process(pid);
+
+      // Print the command if it's available.
+      if (proc.isSome()) {
+        infos.push_back(stringify(pid) + " '" + proc->command + "'");
+      } else {
+        infos.push_back(stringify(pid));
+      }
+    }
+
+    LOG(INFO) << "Found process(es) in the cgroup '" << cgroup << "'. "
+              << "Consider checking the following process(es) listed in "
+              << path::join("/sys/fs/cgroup", cgroup, "cgroup.procs")
+              << ":\n" << strings::join("\n", infos);
+  }
+
+  return Nothing();
+}
+
+
+// Initialize Mesos cgroups for cgroups v2.
+//
+//
+// Ensures that cgroups v2 is available, correctly mounted, and all of the
+// requested controllers are available. If correctly setup, the requested
+// controllers are enabled in the root cgroup, and the Mesos Agent is moved
+// into its own cgroup.
+//
+// Any processes found in the cgroups that are created are logged. This helps
+// in debugging, in case the processes are from a previous run of Mesos that
+// wasn't correctly cleaned up.
+//
+// Creates cgroups:
+// /<root>              Top-level cgroup for the Mesos agent. Has all of the
+//                      requested controllers enabled.
+// /<root>/agent/leaf   Cgroup for the Mesos agent. The `/leaf` suffix is not
+//                      strictly necessary but is consistent with keeping all
+//                      processes inside of `/leaf` folders.
+static Try<Nothing> initializeCgroups2(const slave::Flags& flags)
+{
+  namespace containerizer = mesos::internal::slave::containerizer;
+  CHECK_SOME(flags.agent_subsystems);
+
+  if (!cgroups2::enabled()) {
+    return Error("cgroups v2 is not available on this system");
+  }
+
+  Try<bool> mounted = cgroups2::mounted();
+  if (mounted.isError()) {
+    return Error("Failed to check if cgroups v2 is mounted: "
+                 + mounted.error());
+  }
+  if (!*mounted) {
+    return Error("The cgroup2 file system is not mounted at '/sys/fs/cgroup'");
+  }
+
+  if (!cgroups2::exists(flags.cgroups_root)) {
+    Try<Nothing> create = cgroups2::create(flags.cgroups_root);
+    if (create.isError()) {
+      return Error("Failed to create cgroup '" + flags.cgroups_root + "': "
+                   + create.error());
+    }
+  }
+
+  const string& agent =
+    containerizer::paths::cgroups2::agent(flags.cgroups_root);
+  if (!cgroups2::exists(agent)) {
+    Try<Nothing> create = cgroups2::create(agent);
+    if (create.isError()) {
+      return Error("Failed to create cgroup '" + agent + "': "
+                   + create.error());
+    }
+  }
+
+  const string agentLeaf =
+    containerizer::paths::cgroups2::agent(flags.cgroups_root, true);
+  if (!cgroups2::exists(agentLeaf)) {
+    Try<Nothing> create = cgroups2::create(agentLeaf);
+    if (create.isError()) {
+      return Error("Failed to create cgroup '" + agentLeaf + "': "
+                   + create.error());
+    }
+  }
+
+  Try<Nothing> processes = logProcesses(flags.cgroups_root);
+  if (processes.isError()) { return Error(processes.error()); }
+
+  processes = logProcesses(agent);
+  if (processes.isError()) { return Error(processes.error()); }
+
+  processes = logProcesses(agentLeaf);
+  if (processes.isError()) { return Error(processes.error()); }
+
+  // `cgroups2::ROOT_CGROUP` is the default cgroup all processes belong to when
+  // the cgroup2 hierarchy is mounted. `flags.cgroups_root`, conversely, is the
+  // root cgroup for Mesos. Here we make all the requested controllers
+  // available to the Mesos root cgroup by enabling them in the
+  // `cgroups2::ROOT_CGROUP`, its parent.
+  Try<set<string>> availableControllers = cgroups2::controllers::available(
+      cgroups2::ROOT_CGROUP);
+  if (availableControllers.isError()) {
+    return Error("Failed to determine all available controllers: "
+                 + availableControllers.error());
+  }
+
+  const vector<string> requestedControllers = strings::tokenize(
+      *flags.agent_subsystems, ",");
+
+  Try<Nothing> enable = cgroups2::controllers::enable(
+      cgroups2::ROOT_CGROUP, requestedControllers);
+  if (enable.isError()) {
+    return Error("Failed to enable the requested cgroup v2 controllers: "
+                 + enable.error());
+  }
+
+  // Move the agent process into its own cgroup.
+  Try<Nothing> assign = cgroups2::assign(agentLeaf, getpid());
+  if (assign.isError()) {
+    return Error("Failed to move the Mesos Agent into"
+                 " cgroup '" + agentLeaf + "': " + assign.error());
+  }
+
+  return Nothing();
+}
+#endif // ENABLE_CGROUPS_V2
+
+
 // Move the slave into its own cgroup for each of the specified
 // subsystems.
 //
@@ -157,7 +305,7 @@ const char* malloc_conf = "narenas:4";
 // TODO(jieyu): Make sure the corresponding cgroup isolator is
 // enabled so that the container processes are moved to different
 // cgroups than the agent cgroup.
-static Try<Nothing> assignCgroups(const slave::Flags& flags)
+static Try<Nothing> initializeCgroups(const slave::Flags& flags)
 {
   CHECK_SOME(flags.agent_subsystems);
 
@@ -406,13 +554,35 @@ int main(int argc, char** argv)
   }
 
 #ifdef __linux__
-  // Move the agent process into its own cgroup for each of the specified
-  // subsystems if necessary before the process is initialized.
   if (flags.agent_subsystems.isSome()) {
-    Try<Nothing> assign = assignCgroups(flags);
-    if (assign.isError()) {
-      EXIT(EXIT_FAILURE) << assign.error();
-    }
+    // Use the cgroups v2 isolator if it is supported. Otherwise, use
+    // the cgroups v1 isolator.
+    [&flags] () {
+#ifdef ENABLE_CGROUPS_V2
+      Try<bool> mounted = cgroups2::mounted();
+      if (mounted.isError()) {
+        EXIT(EXIT_FAILURE) << mounted.error();
+      }
+
+      // To use cgroups v2, the host must have a cgroup2 filesystem mounted
+      // at `/sys/fs/cgroup`.
+      if (*mounted) {
+        Try<Nothing> initialize = initializeCgroups2(flags);
+        if (initialize.isError()) {
+          EXIT(EXIT_FAILURE) << initialize.error();
+        }
+        return;
+      }
+#endif // ENABLE_CGROUPS_V2
+
+      // Initialize a cgroups hierarchy for each of the controllers that
+      // are requested, create the root Mesos Agent's cgroup, and move the
+      // agent processes into the new cgroup.
+      Try<Nothing> initialize = initializeCgroups(flags);
+      if (initialize.isError()) {
+        EXIT(EXIT_FAILURE) << initialize.error();
+      }
+    }();
   }
 #endif // __linux__
 

Reply via email to