This is an automated email from the ASF dual-hosted git repository.
bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new 08beb64fb [cgroups2] Introduced API to set memory.min for a cgroup.
08beb64fb is described below
commit 08beb64fbcb78953fa166698131c43f89acb88ad
Author: Devin Leamy <[email protected]>
AuthorDate: Mon Apr 15 14:45:02 2024 -0400
[cgroups2] Introduced API to set memory.min for a cgroup.
Introduces
```
cgroups2::memory::min(cgroup) // get the minimum
cgroups2::memory::set_min(cgroup, bytes) // set the minimum
```
to get and set the minimum memory in bytes that are guaranteed to not
be reclaimed by the kernel under any conditions.
This closes #554
---
src/linux/cgroups2.cpp | 18 ++++++++++++
src/linux/cgroups2.hpp | 41 ++++++++++++++++++++++++++
src/tests/containerizer/cgroups2_tests.cpp | 47 ++++++++++++++++++++++++++++++
3 files changed, 106 insertions(+)
diff --git a/src/linux/cgroups2.cpp b/src/linux/cgroups2.cpp
index 50e4285f2..e70e80780 100644
--- a/src/linux/cgroups2.cpp
+++ b/src/linux/cgroups2.cpp
@@ -772,6 +772,7 @@ namespace memory {
namespace control {
const string CURRENT = "memory.current";
+const string MIN = "memory.min";
} // namespace control {
@@ -786,6 +787,23 @@ Try<Bytes> usage(const string& cgroup)
return Bytes(*contents);
}
+
+Try<Nothing> set_min(const string& cgroup, const Bytes& bytes)
+{
+ return cgroups2::write(cgroup, control::MIN, bytes.bytes());
+}
+
+
+Try<Bytes> min(const string& cgroup)
+{
+ Try<uint64_t> contents = cgroups2::read<uint64_t>(cgroup, control::MIN);
+ if (contents.isError()) {
+ return Error("Failed to read 'memory.min': " + contents.error());
+ }
+
+ return Bytes(*contents);
+}
+
} // namespace memory {
namespace devices {
diff --git a/src/linux/cgroups2.hpp b/src/linux/cgroups2.hpp
index 02fbc1cf7..efd37dc11 100644
--- a/src/linux/cgroups2.hpp
+++ b/src/linux/cgroups2.hpp
@@ -215,11 +215,52 @@ Try<BandwidthLimit> max(const std::string& cgroup);
} // namespace cpu {
+
+// [HIERARCHICAL RESTRICTIONS]
+//
+// If the cgroup2 filesystem is mounted with the 'memory_recursiveprot' option,
+// then the memory protections 'memory.min' and 'memory.low' are recursively
+// applied to children. For example, if a parent has a 'memory.min' of 1GB then
+// the child cgroup cannot reserve more than 1GB of memory. If a child cgroup
+// requests more resources than are available to its parent than the child's
+// request is capped by their parent's constraints. This aligns with the
+// top-down constraint whereby children cannot request more resources than
+// their parents. This mount option is enabled by default on most systems,
+// including those using systemd.
+//
+//
+// [BYTE ALIGNMENT]
+//
+// Byte amounts written to the memory controller that are not aligned with the
+// system page size, `os::page_size()`, will be rounded down to the nearest
+// page size.
+//
+// Note: This contradicts the official documentation which says that the byte
+// amounts will be rounded up.
+//
+// See: https://docs.kernel.org/admin-guide/cgroup-v2.html
namespace memory {
// Current memory usage of a cgroup and its descendants in bytes.
Try<Bytes> usage(const std::string& cgroup);
+
+// Set the minimum memory that is guaranteed to not be reclaimed under any
+// conditions.
+//
+// Note: See the top-level `cgroups2::memory` comment about byte alignment and
+// hierarchical restrictions.
+//
+// Cannot be used for the root cgroup.
+Try<Nothing> set_min(const std::string& cgroup, const Bytes& bytes);
+
+
+// Get the minimum memory that is guaranteed to not be reclaimed under any
+// conditions.
+//
+// Cannot be used for the root cgroup.
+Try<Bytes> min(const std::string& cgroup);
+
} // namespace memory {
namespace devices {
diff --git a/src/tests/containerizer/cgroups2_tests.cpp
b/src/tests/containerizer/cgroups2_tests.cpp
index bf6e86c56..6da6f7d1c 100644
--- a/src/tests/containerizer/cgroups2_tests.cpp
+++ b/src/tests/containerizer/cgroups2_tests.cpp
@@ -26,6 +26,7 @@
#include <process/gmock.hpp>
#include <process/gtest.hpp>
+#include <stout/bytes.hpp>
#include <stout/exit.hpp>
#include <stout/foreach.hpp>
#include <stout/gtest.hpp>
@@ -325,6 +326,52 @@ TEST_F(Cgroups2Test, ROOT_CGROUPS2_MemoryUsage)
}
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_MemoryMinimum)
+{
+ ASSERT_SOME(enable_controllers({"memory"}));
+
+ ASSERT_SOME(cgroups2::create(TEST_CGROUP));
+ ASSERT_SOME(cgroups2::controllers::enable(TEST_CGROUP, {"memory"}));
+
+ const Bytes bytes = Bytes(os::pagesize()) * 5;
+
+ // Does not exist for the root cgroup.
+ EXPECT_ERROR(cgroups2::memory::min(cgroups2::ROOT_CGROUP));
+ EXPECT_ERROR(cgroups2::memory::set_min(cgroups2::ROOT_CGROUP, bytes));
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes));
+ EXPECT_SOME_EQ(bytes, cgroups2::memory::min(TEST_CGROUP));
+}
+
+
+// Check that byte amounts written to the memory controller are rounded
+// down to the nearest page size.
+TEST_F(Cgroups2Test, ROOT_CGROUPS2_MemoryBytesRounding)
+{
+ ASSERT_SOME(enable_controllers({"memory"}));
+
+ ASSERT_SOME(cgroups2::create(TEST_CGROUP));
+ ASSERT_SOME(cgroups2::controllers::enable(TEST_CGROUP, {"memory"}));
+
+ const Bytes bytes = Bytes(os::pagesize());
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes - 1));
+ EXPECT_SOME_EQ(Bytes(0), cgroups2::memory::min(TEST_CGROUP));
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes + 1));
+ EXPECT_SOME_EQ(bytes, cgroups2::memory::min(TEST_CGROUP));
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes * 5 - 1));
+ EXPECT_SOME_EQ(bytes * 4, cgroups2::memory::min(TEST_CGROUP));
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes * 5));
+ EXPECT_SOME_EQ(bytes * 5, cgroups2::memory::min(TEST_CGROUP));
+
+ EXPECT_SOME(cgroups2::memory::set_min(TEST_CGROUP, bytes * 5 + 1));
+ EXPECT_SOME_EQ(bytes * 5, cgroups2::memory::min(TEST_CGROUP));
+}
+
+
TEST_F(Cgroups2Test, ROOT_CGROUPS2_GetCgroups)
{
vector<string> cgroups = {