This is an automated email from the ASF dual-hosted git repository.

gilbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 16fd7e74b2dc6176d418ebcc1608b94a1159cb15
Author: Qian Zhang <[email protected]>
AuthorDate: Wed Feb 27 22:22:29 2019 -0800

    Implemented recovery for volume gid manager.
    
    Review: https://reviews.apache.org/r/69676/
---
 src/slave/paths.cpp                                |   7 ++
 src/slave/paths.hpp                                |  71 ++++++-----
 src/slave/slave.cpp                                |  28 ++++-
 src/slave/slave.hpp                                |   7 ++
 .../volume_gid_manager/volume_gid_manager.cpp      | 133 +++++++++++++++++++--
 .../volume_gid_manager/volume_gid_manager.hpp      |   2 +
 6 files changed, 202 insertions(+), 46 deletions(-)

diff --git a/src/slave/paths.cpp b/src/slave/paths.cpp
index c041a7c..1163c88 100644
--- a/src/slave/paths.cpp
+++ b/src/slave/paths.cpp
@@ -69,6 +69,7 @@ const char RESOURCES_INFO_FILE[] = "resources.info";
 const char RESOURCES_TARGET_FILE[] = "resources.target";
 const char RESOURCE_PROVIDER_STATE_FILE[] = "resource_provider.state";
 const char OPERATION_UPDATES_FILE[] = "operation.updates";
+const char VOLUME_GIDS_FILE[] = "volume_gids";
 
 
 const char CONTAINERS_DIR[] = "containers";
@@ -771,6 +772,12 @@ string getPersistentVolumePath(
 }
 
 
+string getVolumeGidsPath(const string& rootDir)
+{
+  return path::join(rootDir, "volume_gid_manager", VOLUME_GIDS_FILE);
+}
+
+
 Try<string> createExecutorDirectory(
     const string& rootDir,
     const SlaveID& slaveId,
diff --git a/src/slave/paths.hpp b/src/slave/paths.hpp
index 847665c..ad76826 100644
--- a/src/slave/paths.hpp
+++ b/src/slave/paths.hpp
@@ -73,39 +73,41 @@ namespace paths {
 //   |   |   |-- resources.target
 //   |   |   |-- resources_and_operations.state
 //   |   |-- slaves
-//   |       |-- latest (symlink)
-//   |       |-- <slave_id>
-//   |           |-- slave.info
-//   |           |-- operations
-//   |           |   |-- <operation_uuid>
-//   |           |       |-- operation.updates
-//   |           |-- resource_providers
-//   |           |   |-- <type>
-//   |           |       |-- <name>
-//   |           |           |-- latest (symlink)
-//   |           |           |-- <resource_provider_id>
-//   |           |               |-- resource_provider.state
-//   |           |               |-- operations
-//   |           |                   |-- <operation_uuid>
-//   |           |                       |-- operation.updates
-//   |           |-- frameworks
-//   |               |-- <framework_id>
-//   |                   |-- framework.info
-//   |                   |-- framework.pid
-//   |                   |-- executors
-//   |                       |-- <executor_id>
-//   |                           |-- executor.info
-//   |                           |-- runs
-//   |                               |-- latest (symlink)
-//   |                               |-- <container_id> (sandbox)
-//   |                                   |-- executor.sentinel (if completed)
-//   |                                   |-- pids
-//   |                                   |   |-- forked.pid
-//   |                                   |   |-- libprocess.pid
-//   |                                   |-- tasks
-//   |                                       |-- <task_id>
-//   |                                           |-- task.info
-//   |                                           |-- task.updates
+//   |   |   |-- latest (symlink)
+//   |   |   |-- <slave_id>
+//   |   |       |-- slave.info
+//   |   |       |-- operations
+//   |   |       |   |-- <operation_uuid>
+//   |   |       |       |-- operation.updates
+//   |   |       |-- resource_providers
+//   |   |       |   |-- <type>
+//   |   |       |       |-- <name>
+//   |   |       |           |-- latest (symlink)
+//   |   |       |           |-- <resource_provider_id>
+//   |   |       |               |-- resource_provider.state
+//   |   |       |               |-- operations
+//   |   |       |                   |-- <operation_uuid>
+//   |   |       |                       |-- operation.updates
+//   |   |       |-- frameworks
+//   |   |           |-- <framework_id>
+//   |   |               |-- framework.info
+//   |   |               |-- framework.pid
+//   |   |               |-- executors
+//   |   |                   |-- <executor_id>
+//   |   |                       |-- executor.info
+//   |   |                       |-- runs
+//   |   |                           |-- latest (symlink)
+//   |   |                           |-- <container_id> (sandbox)
+//   |   |                               |-- executor.sentinel (if completed)
+//   |   |                               |-- pids
+//   |   |                               |   |-- forked.pid
+//   |   |                               |   |-- libprocess.pid
+//   |   |                               |-- tasks
+//   |   |                                   |-- <task_id>
+//   |   |                                       |-- task.info
+//   |   |                                       |-- task.updates
+//   |   |-- volume_gid_manager
+//   |       |-- volume_gids
 //   |-- volumes
 //   |   |-- roles
 //   |       |-- <role>
@@ -435,6 +437,9 @@ std::string getPersistentVolumePath(
     const Resource& resource);
 
 
+std::string getVolumeGidsPath(const std::string& rootDir);
+
+
 Try<std::string> createExecutorDirectory(
     const std::string& rootDir,
     const SlaveID& slaveId,
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index e58684a..f9b5817 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -7362,14 +7362,38 @@ Future<Nothing> Slave::recover(const Try<state::State>& 
state)
     }
   }
 
-  return taskStatusUpdateManager->recover(metaDir, slaveState)
+  return _recoverVolumeGidManager(state->rebooted)
+    .then(defer(self(), &Slave::_recoverTaskStatusUpdates, slaveState))
     .then(defer(self(), &Slave::_recoverContainerizer, slaveState))
     .then(defer(self(), &Slave::_recoverOperations, slaveState));
 }
 
 
+Future<Nothing> Slave::_recoverVolumeGidManager(bool rebooted)
+{
+#ifndef __WINDOWS__
+  if (volumeGidManager) {
+    return volumeGidManager->recover(rebooted);
+  }
+  return Nothing();
+#else
+  return Nothing();
+#endif // __WINDOWS__
+}
+
+
+Future<Option<SlaveState>> Slave::_recoverTaskStatusUpdates(
+    const Option<SlaveState>& state)
+{
+  return taskStatusUpdateManager->recover(metaDir, state)
+    .then([state]() -> Future<Option<SlaveState>> {
+      return state;
+    });
+}
+
+
 Future<Nothing> Slave::_recoverContainerizer(
-    const Option<state::SlaveState>& state)
+    const Option<SlaveState>& state)
 {
   return containerizer->recover(state);
 }
diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp
index 808531c..d9dbecd 100644
--- a/src/slave/slave.hpp
+++ b/src/slave/slave.hpp
@@ -547,6 +547,13 @@ public:
   // executors. Otherwise, the slave attempts to shutdown/kill them.
   process::Future<Nothing> _recover();
 
+  // This is a helper to call `recover()` on the volume gid manager.
+  process::Future<Nothing> _recoverVolumeGidManager(bool rebooted);
+
+  // This is a helper to call `recover()` on the task status update manager.
+  process::Future<Option<state::SlaveState>> _recoverTaskStatusUpdates(
+      const Option<state::SlaveState>& slaveState);
+
   // This is a helper to call `recover()` on the containerizer at the end of
   // `recover()` and before `__recover()`.
   // TODO(idownes): Remove this when we support defers to objects.
diff --git a/src/slave/volume_gid_manager/volume_gid_manager.cpp 
b/src/slave/volume_gid_manager/volume_gid_manager.cpp
index 49f70a2..d887387 100644
--- a/src/slave/volume_gid_manager/volume_gid_manager.cpp
+++ b/src/slave/volume_gid_manager/volume_gid_manager.cpp
@@ -24,6 +24,7 @@
 #include <process/dispatch.hpp>
 #include <process/id.hpp>
 
+#include <stout/os/exists.hpp>
 #include <stout/os/su.hpp>
 
 #include "common/values.hpp"
@@ -32,6 +33,9 @@
 #include "linux/fs.hpp"
 #endif // __linux__
 
+#include "slave/paths.hpp"
+#include "slave/state.hpp"
+
 #include "slave/volume_gid_manager/volume_gid_manager.hpp"
 
 using std::string;
@@ -42,6 +46,7 @@ using process::Failure;
 using process::Future;
 using process::Owned;
 
+using mesos::internal::values::intervalSetToRanges;
 using mesos::internal::values::rangesToIntervalSet;
 
 namespace mesos {
@@ -152,10 +157,75 @@ static Try<Nothing> setVolumeOwnership(
 class VolumeGidManagerProcess : public 
process::Process<VolumeGidManagerProcess>
 {
 public:
-  VolumeGidManagerProcess(const IntervalSet<gid_t>& gids)
+  VolumeGidManagerProcess(
+      const IntervalSet<gid_t>& gids,
+      const string& workDir)
     : ProcessBase(process::ID::generate("volume-gid-manager")),
       totalGids(gids),
-      freeGids(gids) {}
+      freeGids(gids),
+      metaDir(paths::getMetaRootDir(workDir)) {}
+
+  Future<Nothing> recover(bool rebooted)
+  {
+    LOG(INFO) << "Recovering volume gid manager";
+
+    const string volumeGidsPath = paths::getVolumeGidsPath(metaDir);
+    if (os::exists(volumeGidsPath)) {
+      Result<VolumeGidInfos> volumeGidInfos =
+        state::read<VolumeGidInfos>(volumeGidsPath);
+
+      if (volumeGidInfos.isError()) {
+        return Failure(
+            "Failed to read volume gid infos from '" + volumeGidsPath +
+            "' " + volumeGidInfos.error());
+      } else if (volumeGidInfos.isNone()) {
+        // This could happen if the agent is hard rebooted after the file is
+        // created but before the data is synced on disk.
+        LOG(WARNING) << "The volume gids file '"
+                     << volumeGidsPath << "' is empty";
+      } else {
+        CHECK_SOME(volumeGidInfos);
+
+        hashset<string> orphans;
+        foreach (const VolumeGidInfo& info, volumeGidInfos->infos()) {
+          freeGids -= info.gid();
+          infos.put(info.path(), info);
+
+          // Normally the gid allocated to the PARENT type SANDBOX_PATH
+          // volume is deallocated when the parent container is destroyed,
+          // However after agent reboot, containerizer will not destroy any
+          // containers since all containers are already gone, so to avoid
+          // gid leak in this case, we need to deallocate gid for the PARENT
+          // type SANDBOX_PATH volume here.
+          if (rebooted && info.type() == VolumeGidInfo::SANDBOX_PATH) {
+            LOG(INFO) << "Deallocating gid " << info.gid() << " for the PARENT"
+                      << "type SANDBOX_PATH volume '" << info.path()
+                      << "' after agent reboot";
+
+            orphans.insert(Path(info.path()).dirname());
+            continue;
+          }
+
+          // This could happen in the case that agent crashes after the
+          // shared persistent volume is deleted but before volume gid
+          // manager deallocates its gid.
+          if (!os::exists(info.path())) {
+            LOG(WARNING) << "Deallocating gid " << info.gid() << " for the "
+                         << "non-existent volume path '" << info.path() << "'";
+
+            orphans.insert(info.path());
+          }
+        }
+
+        // Deallocate all the orphaned paths.
+        foreach (const string& path, orphans) {
+          deallocate(path);
+        }
+      }
+    }
+
+    return Nothing();
+  }
 
   // This method will be called when a container running as non-root user tries
   // to use a shared persistent volume or a PARENT type SANDBOX_PATH volume, 
the
@@ -217,13 +287,6 @@ public:
         LOG(INFO) << "Allocating gid " << gid << " to the volume path '"
                   << path << "'";
 
-        Try<Nothing> result = setVolumeOwnership(path, gid, true);
-        if (result.isError()) {
-          return Failure(
-              "Failed to set the owner group of the volume path '" + path +
-              "' to " + stringify(gid) + ": " + result.error());
-        }
-
         freeGids -= gid;
 
         VolumeGidInfo info;
@@ -232,6 +295,19 @@ public:
         info.set_gid(gid);
 
         infos.put(path, info);
+
+        Try<Nothing> status = persist();
+        if (status.isError()) {
+          return Failure(
+              "Failed to save state of volume gid infos: " + status.error());
+        }
+
+        Try<Nothing> result = setVolumeOwnership(path, gid, true);
+        if (result.isError()) {
+          return Failure(
+              "Failed to set the owner group of the volume path '" + path +
+              "' to " + stringify(gid) + ": " + result.error());
+        }
       }
     }
 
@@ -251,6 +327,7 @@ public:
   {
     vector<string> sandboxPathVolumes;
 
+    bool changed = false;
     for (auto it = infos.begin(); it != infos.end(); ) {
       const VolumeGidInfo& info = it->second;
       const string& volumePath = info.path();
@@ -275,6 +352,7 @@ public:
         }
 
         it = infos.erase(it);
+        changed = true;
       } else {
         ++it;
       }
@@ -320,13 +398,40 @@ public:
       }
     }
 
+    if (changed) {
+      Try<Nothing> status = persist();
+      if (status.isError()) {
+        return Failure(
+            "Failed to save state of volume gid infos: " + status.error());
+      }
+    }
+
     return Nothing();
   }
 
 private:
+  Try<Nothing> persist()
+  {
+    VolumeGidInfos volumeGidInfos;
+    foreachvalue (const VolumeGidInfo& info, infos) {
+      volumeGidInfos.add_infos()->CopyFrom(info);
+    }
+
+    Try<Nothing> status = state::checkpoint(
+        paths::getVolumeGidsPath(metaDir), volumeGidInfos);
+
+    if (status.isError()) {
+      return Error("Failed to perform checkpoint: " + status.error());
+    }
+
+    return Nothing();
+  }
+
   const IntervalSet<gid_t> totalGids;
   IntervalSet<gid_t> freeGids;
 
+  const string metaDir;
+
   // Allocated gid infos keyed by the volume path.
   hashmap<string, VolumeGidInfo> infos;
 };
@@ -367,8 +472,8 @@ Try<VolumeGidManager*> VolumeGidManager::create(const 
Flags& flags)
     return Error("Empty volume gid range");
   }
 
-  return new VolumeGidManager(
-      Owned<VolumeGidManagerProcess>(new VolumeGidManagerProcess(gids.get())));
+  return new VolumeGidManager(Owned<VolumeGidManagerProcess>(
+      new VolumeGidManagerProcess(gids.get(), flags.work_dir)));
 }
 
 
@@ -387,6 +492,12 @@ VolumeGidManager::~VolumeGidManager()
 }
 
 
+Future<Nothing> VolumeGidManager::recover(bool rebooted) const
+{
+  return dispatch(process.get(), &VolumeGidManagerProcess::recover, rebooted);
+}
+
+
 Future<gid_t> VolumeGidManager::allocate(
     const string& path,
     VolumeGidInfo::Type type) const
diff --git a/src/slave/volume_gid_manager/volume_gid_manager.hpp 
b/src/slave/volume_gid_manager/volume_gid_manager.hpp
index 51732af..838728b 100644
--- a/src/slave/volume_gid_manager/volume_gid_manager.hpp
+++ b/src/slave/volume_gid_manager/volume_gid_manager.hpp
@@ -44,6 +44,8 @@ public:
 
   ~VolumeGidManager();
 
+  process::Future<Nothing> recover(bool rebooted) const;
+
   process::Future<gid_t> allocate(
       const std::string& path,
       VolumeGidInfo::Type type) const;

Reply via email to