This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a56015fb [cgroups2] Add OOM listening to the MemoryControllerProcess.
5a56015fb is described below

commit 5a56015fb24993ac92d32e2e56f5db7935fcbebf
Author: None <None>
AuthorDate: Tue May 14 16:41:16 2024 -0400

    [cgroups2] Add OOM listening to the MemoryControllerProcess.
    
    Introduces OOM listening to the MemoryControllerProcess so that we
    detect, report, and respond to OOM events.
    
    Review: https://reviews.apache.org/r/74979/
---
 .../isolators/cgroups2/controllers/memory.cpp      | 120 +++++++++++++++++++++
 .../isolators/cgroups2/controllers/memory.hpp      |  19 +++-
 2 files changed, 138 insertions(+), 1 deletion(-)

diff --git 
a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp 
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
index 732b1c65f..872a585e2 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
@@ -74,6 +74,8 @@ Future<Nothing> MemoryControllerProcess::prepare(
 
   infos.put(containerId, Info());
 
+  oomListen(containerId, cgroup);
+
   return Nothing();
 }
 
@@ -105,10 +107,24 @@ Future<Nothing> MemoryControllerProcess::recover(
   infos.put(containerId, Info());
   infos[containerId].hardLimitUpdated = true;
 
+  oomListen(containerId, cgroup);
+
   return Nothing();
 }
 
 
+Future<ContainerLimitation> MemoryControllerProcess::watch(
+    const ContainerID& containerId,
+    const string& cgroup)
+{
+  if (!infos.contains(containerId)) {
+    return Failure("Unknown container");
+  }
+
+  return infos[containerId].limitation.future();
+}
+
+
 Future<Nothing> MemoryControllerProcess::update(
   const ContainerID& containerId,
   const string& cgroup,
@@ -192,12 +208,116 @@ Future<Nothing> MemoryControllerProcess::cleanup(
     return Nothing();
   }
 
+  if (infos[containerId].oom.isPending()) {
+    infos[containerId].oom.discard();
+  }
+
   infos.erase(containerId);
 
   return Nothing();
 }
 
 
+void MemoryControllerProcess::oomListen(
+    const ContainerID& containerId,
+    const string& cgroup)
+{
+  if (!infos.contains(containerId)) {
+    LOG(ERROR) << "Cannot listen for OOM events for unknown container "
+               << containerId;
+    return;
+  }
+
+  infos[containerId].oom = cgroups2::memory::oom(cgroup);
+
+  LOG(INFO) << "Listening for OOM events for container "
+            << containerId;
+
+  infos[containerId].oom.onAny(
+      defer(PID<MemoryControllerProcess>(this),
+            &MemoryControllerProcess::oomed,
+            containerId,
+            cgroup,
+            lambda::_1));
+}
+
+
+void MemoryControllerProcess::oomed(
+    const ContainerID& containerId,
+    const string& cgroup,
+    const Future<Nothing>& oom)
+{
+  if (oom.isDiscarded()) {
+    LOG(INFO) << "OOM event listener discarded";
+    return;
+  }
+
+  if (oom.isFailed()) {
+    LOG(ERROR) << "OOM event listener failed: " << oom.failure();
+    return;
+  }
+
+  if (!infos.contains(containerId)) {
+    // It is likely that process exited is executed before this
+    // function (e.g. The kill and OOM events happen at the same time,
+    // and the process exit event arrives first). Therefore, we should
+    // not report a fatal error here.
+    LOG(INFO) << "OOM event received for terminated container";
+    return;
+  }
+
+  LOG(INFO) << "OOM detected for container" << containerId;
+
+  // Construct a message for the limitation to help with debugging the OOM.
+  ostringstream limitMessage;
+  limitMessage << "Memory limit exceeded: ";
+
+  // TODO(dleamy): Report the peak memory usage of the container. The
+  // 'memory.peak' control is only available on newer Linux kernels.
+
+  // Report memory statistics if successfully retrieved.
+  Try<Stats> stats = cgroups2::memory::stats(cgroup);
+  if (stats.isError()) {
+    LOG(ERROR) << "Failed to get cgroup memory stats for container "
+               << containerId << ": " << stats.error();
+  } else {
+    limitMessage << "\nMEMORY STATISTICS post-OOM: \n";
+    limitMessage << "anon: " << stats->anon << "\n";
+    limitMessage << "file: " << stats->file << "\n";
+    limitMessage << "kernel: " << stats->kernel << "\n";
+  }
+
+  LOG(INFO) << limitMessage.str();
+
+  Result<Bytes> hardLimit = cgroups2::memory::max(cgroup);
+  if (hardLimit.isError()) {
+    LOG(ERROR) << "Failed to get hard memory limit for container "
+               << containerId << ": " << hardLimit.error();
+  } else if (hardLimit.isNone()) {
+    LOG(ERROR) << "Unexpected OOM for container " << containerId
+               << ": no memory hard limit set";
+  }
+
+  // Complete the container limitation promise with a memory resource
+  // limitation.
+  //
+  // TODO(jieyu): This is not accurate if the memory resource is from
+  // a non-star role or spans roles (e.g., "*" and "role"). Ideally,
+  // we should save the resources passed in and report it here.
+  //
+  // TODO(dleamy): We report the hard limit because not all machines have
+  // access to 'memory.peak', the peak memory usage of the cgroup.
+  double megabytes = hardLimit.isSome()
+                      ? (double)hardLimit->bytes() / Bytes::MEGABYTES : 0;
+  Resources memory = *Resources::parse( "mem", stringify(megabytes), "*");
+
+  infos[containerId].limitation.set(
+    protobuf::slave::createContainerLimitation(
+      memory,
+      limitMessage.str(),
+      TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY));
+}
+
 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {
diff --git 
a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp 
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp
index 2e60b2c19..3dbfca917 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp
@@ -54,6 +54,10 @@ public:
       const ContainerID& containerId,
       const std::string& cgroup) override;
 
+  process::Future<mesos::slave::ContainerLimitation> watch(
+      const ContainerID& containerId,
+      const std::string& cgroup) override;
+
   process::Future<Nothing> update(
       const ContainerID& containerId,
       const std::string& cgroup,
@@ -68,6 +72,10 @@ public:
 private:
   struct Info
   {
+    process::Future<Nothing> oom;
+
+    process::Promise<mesos::slave::ContainerLimitation> limitation;
+
     // Check if the hard memory limit has been updated for the container.
     // Also true if the container was recovered.
     bool hardLimitUpdated = false;
@@ -75,6 +83,15 @@ private:
 
   MemoryControllerProcess(const Flags& flags);
 
+  void oomListen(
+      const ContainerID& containerId,
+      const std::string& cgroup);
+
+  void oomed(
+      const ContainerID& containerId,
+      const std::string& cgroup,
+      const process::Future<Nothing>& oomFuture);
+
   hashmap<ContainerID, Info> infos;
 };
 
@@ -82,4 +99,4 @@ private:
 } // namespace internal {
 } // namespace mesos {
 
-#endif // __MEMORY_HPP__
+#endif // __MEMORY_HPP__
\ No newline at end of file

Reply via email to