This is an automated email from the ASF dual-hosted git repository.

bmahler pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new b6e2e0bff [device manager] Checkpoint state on device manager state 
change.
b6e2e0bff is described below

commit b6e2e0bff1098088cdc5daf79a6c9baeb6312dfc
Author: Jason Zhou <[email protected]>
AuthorDate: Wed Aug 7 14:44:12 2024 -0400

    [device manager] Checkpoint state on device manager state change.
    
    Currently we do not checkpoint the device access state of each cgroup
    when the configure or reconfigure is called. Meaning that we have
    no way of recovering a cgroup's device access state.
    
    We will checkpoint state of the device manager whenever its state
    is being changed to ensure that we can recover the most recent state
    when necessary.
    
    Review: https://reviews.apache.org/r/75143/
---
 .../device_manager/device_manager.cpp              | 39 ++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/slave/containerizer/device_manager/device_manager.cpp 
b/src/slave/containerizer/device_manager/device_manager.cpp
index e613323dc..a2b6b9cd6 100644
--- a/src/slave/containerizer/device_manager/device_manager.cpp
+++ b/src/slave/containerizer/device_manager/device_manager.cpp
@@ -27,7 +27,9 @@
 #include <stout/unreachable.hpp>
 
 #include "slave/containerizer/device_manager/device_manager.hpp"
+#include "slave/containerizer/device_manager/state.hpp"
 #include "slave/paths.hpp"
+#include "slave/state.hpp"
 #include "linux/cgroups2.hpp"
 
 using std::string;
@@ -227,10 +229,43 @@ private:
 
   hashmap<string, DeviceManager::CgroupDeviceAccess> device_access_per_cgroup;
 
-  // TODO(jasonzhou): persist device_access_per_cgroup on disk.
+  Try<Nothing> checkpoint() const
+  {
+    CgroupDeviceAccessStates states;
+
+    foreachpair (const string& cgroup,
+                 const DeviceManager::CgroupDeviceAccess& access,
+                 device_access_per_cgroup) {
+      CgroupDeviceAccessState* state = 
&(*(states.mutable_device_access_per_cgroup()))[cgroup];
+
+      foreach (const Entry& entry, access.allow_list) {
+        state->add_allow_list(stringify(entry));
+      }
+      foreach (const Entry& entry, access.deny_list) {
+        state->add_deny_list(stringify(entry));
+      }
+    }
+
+    Try<Nothing> status =
+      state::checkpoint(paths::getDevicesStatePath(meta_dir), states);
+
+    if (status.isError()) {
+      return Error("Failed to perform checkpoint: " + status.error());
+    }
+
+    return Nothing();
+  }
+
   Try<Nothing> commit_device_access_changes(const string& cgroup) const
   {
-    Try<Nothing> status = cgroups2::devices::configure(
+    Try<Nothing> status = checkpoint();
+
+    if (status.isError()) {
+      return Error("Failed to checkpoint device access state: "
+                   + status.error());
+    }
+
+    status = cgroups2::devices::configure(
         cgroup,
         device_access_per_cgroup.at(cgroup).allow_list,
         device_access_per_cgroup.at(cgroup).deny_list);

Reply via email to