-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/74979/#review226445
-----------------------------------------------------------
Ship it!
Local edits:
```
diff --git
a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
index de1d7d4f5..872a585e2 100644
--- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
+++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
@@ -268,39 +268,36 @@ void MemoryControllerProcess::oomed(
LOG(INFO) << "OOM detected for container" << containerId;
- // Construct a message for the limitation to help with debugging
- // the OOM.
-
+ // Construct a message for the limitation to help with debugging the OOM.
ostringstream limitMessage;
limitMessage << "Memory limit exceeded: ";
// TODO(dleamy): Report the peak memory usage of the container. The
- // 'memory.peak' control is only available on newer Linux
- // kernels.
-
- // Report memory statistics if successfully retrieved
- Try<Stats> memoryStats = cgroups2::memory::stats(cgroup);
- if (memoryStats.isError()) {
- LOG(ERROR) << "Failed to get cgroup memory stats: " << memoryStats.error();
+ // 'memory.peak' control is only available on newer Linux kernels.
+
+ // Report memory statistics if successfully retrieved.
+ Try<Stats> stats = cgroups2::memory::stats(cgroup);
+ if (stats.isError()) {
+ LOG(ERROR) << "Failed to get cgroup memory stats for container "
+ << containerId << ": " << stats.error();
} else {
- limitMessage << "\nMEMORY STATISTICS: \n";
- limitMessage << "(oom-killer may have already run, so stats may not ";
- limitMessage << "reflect memory state at time of OOM) \n";
- limitMessage << "Anon: " << memoryStats->anon << "\n";
- limitMessage << "File: " << memoryStats->file << "\n";
- limitMessage << "Kernel: " << memoryStats->kernel << "\n";
+ limitMessage << "\nMEMORY STATISTICS post-OOM: \n";
+ limitMessage << "anon: " << stats->anon << "\n";
+ limitMessage << "file: " << stats->file << "\n";
+ limitMessage << "kernel: " << stats->kernel << "\n";
}
-
-
LOG(INFO) << limitMessage.str();
Result<Bytes> hardLimit = cgroups2::memory::max(cgroup);
if (hardLimit.isError()) {
- LOG(ERROR) << "Failed to get hard memory limit: " << hardLimit.error();
+ LOG(ERROR) << "Failed to get hard memory limit for container "
+ << containerId << ": " << hardLimit.error();
} else if (hardLimit.isNone()) {
- LOG(ERROR) << "No hard limit was set for the container - unexpected OOM";
+ LOG(ERROR) << "Unexpected OOM for container " << containerId
+ << ": no memory hard limit set";
}
+
// Complete the container limitation promise with a memory resource
// limitation.
//
@@ -309,12 +306,10 @@ void MemoryControllerProcess::oomed(
// we should save the resources passed in and report it here.
//
// TODO(dleamy): We report the hard limit because not all machines have
- // access to 'memory.peak', the peak memory usage of the
- // cgroup.
- double megabytes = hardLimit.isSome()
+ // access to 'memory.peak', the peak memory usage of the cgroup.
+ double megabytes = hardLimit.isSome()
? (double)hardLimit->bytes() / Bytes::MEGABYTES : 0;
- Resources memory = *Resources::parse(
- "mem", stringify(megabytes), "*");
+ Resources memory = *Resources::parse( "mem", stringify(megabytes), "*");
infos[containerId].limitation.set(
protobuf::slave::createContainerLimitation(
@@ -325,4 +320,4 @@ void MemoryControllerProcess::oomed(
} // namespace slave {
} // namespace internal {
-} // namespace mesos {
\ No newline at end of file
+} // namespace mesos {
```
src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
Lines 280 (patched)
<https://reviews.apache.org/r/74979/#comment314659>
nit: extra whitespace here and below (you probably want to update your code
editor to remove trailing whitespace on edited lines? hopefully it has that
feature vs removing *all* trailing whitespace in the file and accidentally
touching unchanged lines)
src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
Lines 293-295 (patched)
<https://reviews.apache.org/r/74979/#comment314658>
nit: extra newlines here
- Benjamin Mahler
On May 13, 2024, 9:59 p.m., Jason Zhou wrote:
>
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/74979/
> -----------------------------------------------------------
>
> (Updated May 13, 2024, 9:59 p.m.)
>
>
> Review request for mesos and Benjamin Mahler.
>
>
> Repository: mesos
>
>
> Description
> -------
>
> Introduces OOM listening to the MemoryControllerProcess so that we
> detect, report, and respond to OOM events.
>
>
> Diffs
> -----
>
> src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp
> 2e60b2c19a781c2d8ab24e89e440383ca517868c
> src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp
> 732b1c65febdc78d8854e571bb02a9d367528434
>
>
> Diff: https://reviews.apache.org/r/74979/diff/3/
>
>
> Testing
> -------
>
>
> Thanks,
>
> Jason Zhou
>
>