This is an automated email from the ASF dual-hosted git repository. bmahler pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 9d3541c1b3668d15ddee18a03fa8be15c9812550 Author: Jason Zhou <[email protected]> AuthorDate: Tue Jul 23 23:57:00 2024 -0400 [cgroups2] Add ebpf program attachment to the DeviceManager. Currently, the device manager only keeps track of the state in memory, and does not commit the changes by attaching an ebpf file to the corresponding cgroup. We will now generate and attach the ebpf file when configure and reconfigure are called. Review: https://reviews.apache.org/r/75102/ --- .../device_manager/device_manager.cpp | 32 +++++++++++++ .../device_manager/device_manager.hpp | 4 +- src/tests/device_manager_tests.cpp | 52 +++++++++++++++++++++- 3 files changed, 83 insertions(+), 5 deletions(-) diff --git a/src/slave/containerizer/device_manager/device_manager.cpp b/src/slave/containerizer/device_manager/device_manager.cpp index a392000ea..d28b2c35b 100644 --- a/src/slave/containerizer/device_manager/device_manager.cpp +++ b/src/slave/containerizer/device_manager/device_manager.cpp @@ -27,6 +27,7 @@ #include "slave/containerizer/device_manager/device_manager.hpp" #include "slave/paths.hpp" +#include "linux/cgroups2.hpp" using std::string; using std::vector; @@ -94,6 +95,14 @@ public: device_access_per_cgroup[cgroup].allow_list = allow_list; device_access_per_cgroup[cgroup].deny_list = deny_list; + Try<Nothing> commit = commit_device_access_changes(cgroup); + if (commit.isError()) { + // We do not rollback the state when something goes wrong in the + // update because the container will be destroyed when this fails. + return Failure("Failed to commit cgroup device access changes: " + + commit.error()); + } + return Nothing(); } @@ -120,6 +129,14 @@ public: non_wildcard_additions, non_wildcard_removals); + Try<Nothing> commit = commit_device_access_changes(cgroup); + if (commit.isError()) { + // We do not rollback the state when something goes wrong in the + // update because the container will be destroyed when this fails. + return Failure("Failed to commit cgroup device access changes: " + + commit.error()); + } + return Nothing(); } @@ -139,6 +156,21 @@ private: const string meta_dir; hashmap<string, DeviceManager::CgroupDeviceAccess> device_access_per_cgroup; + + // TODO(jasonzhou): persist device_access_per_cgroup on disk. + Try<Nothing> commit_device_access_changes(const string& cgroup) const + { + Try<Nothing> status = cgroups2::devices::configure( + cgroup, + device_access_per_cgroup.at(cgroup).allow_list, + device_access_per_cgroup.at(cgroup).deny_list); + + if (status.isError()) { + return Error("Failed to configure device access: " + status.error()); + } + + return Nothing(); + } }; diff --git a/src/slave/containerizer/device_manager/device_manager.hpp b/src/slave/containerizer/device_manager/device_manager.hpp index a6e87dd54..eafa4e6f6 100644 --- a/src/slave/containerizer/device_manager/device_manager.hpp +++ b/src/slave/containerizer/device_manager/device_manager.hpp @@ -45,9 +45,7 @@ class DeviceManagerProcess; // to incrementally adjust the device access state for a cgroup and generate // the appropriate ebpf programs. // -// TODO(jasonzhou): This initial implementation only adds in-memory tracking -// of the access, add bpf program attachment and checkpointing in follow-on -// patches. +// TODO(jasonzhou): Add checkpointing / recovery support in follow-on patches. class DeviceManager { public: diff --git a/src/tests/device_manager_tests.cpp b/src/tests/device_manager_tests.cpp index 398846bdd..19ca0af4c 100644 --- a/src/tests/device_manager_tests.cpp +++ b/src/tests/device_manager_tests.cpp @@ -21,6 +21,7 @@ #include <tuple> #include <process/gtest.hpp> +#include <process/reap.hpp> #include <stout/unreachable.hpp> #include <stout/tests/utils.hpp> @@ -110,13 +111,14 @@ TEST(NonWildcardEntry, NonWildcardFromWildcard) TEST_F(DeviceManagerTest, ROOT_DeviceManagerConfigure_Normal) { + typedef std::pair<string, int> OpenArgs; ASSERT_SOME(cgroups2::create(TEST_CGROUP)); slave::Flags flags; flags.work_dir = *sandbox; Owned<DeviceManager> dm = Owned<DeviceManager>(CHECK_NOTERROR(DeviceManager::create(flags))); - vector<devices::Entry> allow_list = {*devices::Entry::parse("c 1:3 w")}; + vector<devices::Entry> allow_list = {*devices::Entry::parse("c 1:3 r")}; vector<devices::Entry> deny_list = {*devices::Entry::parse("c 3:1 w")}; AWAIT_ASSERT_READY(dm->configure( @@ -128,6 +130,29 @@ TEST_F(DeviceManagerTest, ROOT_DeviceManagerConfigure_Normal) EXPECT_EQ(cgroup_state.allow_list, allow_list); EXPECT_EQ(cgroup_state.deny_list, deny_list); + + pid_t pid = ::fork(); + ASSERT_NE(-1, pid); + + if (pid == 0) { + // Move the child process into the newly created cgroup. + Try<Nothing> assign = cgroups2::assign(TEST_CGROUP, ::getpid()); + if (assign.isError()) { + SAFE_EXIT(EXIT_FAILURE, "Failed to assign child process to cgroup"); + } + + // Check that we can only do the "allowed_accesses". + if (os::open(os::DEV_NULL, O_RDONLY).isError()) { + SAFE_EXIT(EXIT_FAILURE, "Expected allowed read to succeed"); + } + if (os::open(os::DEV_NULL, O_RDWR).isSome()) { + SAFE_EXIT(EXIT_FAILURE, "Expected blocked read to fail"); + } + + ::_exit(EXIT_SUCCESS); + } + + AWAIT_EXPECT_WEXITSTATUS_EQ(EXIT_SUCCESS, process::reap(pid)); } @@ -151,7 +176,7 @@ TEST_F(DeviceManagerTest, ROOT_DeviceManagerReconfigure_Normal) EXPECT_EQ(cgroup_state.allow_list, allow_list); EXPECT_EQ(cgroup_state.deny_list, deny_list); - vector<devices::Entry> additions = {*devices::Entry::parse("b 3:4 w")}; + vector<devices::Entry> additions = {*devices::Entry::parse("c 1:3 r")}; vector<devices::Entry> removals = allow_list; AWAIT_ASSERT_READY(dm->reconfigure( @@ -162,6 +187,29 @@ TEST_F(DeviceManagerTest, ROOT_DeviceManagerReconfigure_Normal) EXPECT_EQ(cgroup_state.allow_list, additions); EXPECT_EQ(cgroup_state.deny_list, deny_list); + + pid_t pid = ::fork(); + ASSERT_NE(-1, pid); + + if (pid == 0) { + // Move the child process into the newly created cgroup. + Try<Nothing> assign = cgroups2::assign(TEST_CGROUP, ::getpid()); + if (assign.isError()) { + SAFE_EXIT(EXIT_FAILURE, "Failed to assign child process to cgroup"); + } + + // Check that we can only do the "allowed_accesses". + if (os::open(os::DEV_NULL, O_RDONLY).isError()) { + SAFE_EXIT(EXIT_FAILURE, "Expected allowed read to succeed"); + } + if (os::open(os::DEV_NULL, O_RDWR).isSome()) { + SAFE_EXIT(EXIT_FAILURE, "Expected blocked read to fail"); + } + + ::_exit(EXIT_SUCCESS); + } + + AWAIT_EXPECT_WEXITSTATUS_EQ(EXIT_SUCCESS, process::reap(pid)); }
