This is an automated email from the ASF dual-hosted git repository. gilbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 16fd7e74b2dc6176d418ebcc1608b94a1159cb15 Author: Qian Zhang <[email protected]> AuthorDate: Wed Feb 27 22:22:29 2019 -0800 Implemented recovery for volume gid manager. Review: https://reviews.apache.org/r/69676/ --- src/slave/paths.cpp | 7 ++ src/slave/paths.hpp | 71 ++++++----- src/slave/slave.cpp | 28 ++++- src/slave/slave.hpp | 7 ++ .../volume_gid_manager/volume_gid_manager.cpp | 133 +++++++++++++++++++-- .../volume_gid_manager/volume_gid_manager.hpp | 2 + 6 files changed, 202 insertions(+), 46 deletions(-) diff --git a/src/slave/paths.cpp b/src/slave/paths.cpp index c041a7c..1163c88 100644 --- a/src/slave/paths.cpp +++ b/src/slave/paths.cpp @@ -69,6 +69,7 @@ const char RESOURCES_INFO_FILE[] = "resources.info"; const char RESOURCES_TARGET_FILE[] = "resources.target"; const char RESOURCE_PROVIDER_STATE_FILE[] = "resource_provider.state"; const char OPERATION_UPDATES_FILE[] = "operation.updates"; +const char VOLUME_GIDS_FILE[] = "volume_gids"; const char CONTAINERS_DIR[] = "containers"; @@ -771,6 +772,12 @@ string getPersistentVolumePath( } +string getVolumeGidsPath(const string& rootDir) +{ + return path::join(rootDir, "volume_gid_manager", VOLUME_GIDS_FILE); +} + + Try<string> createExecutorDirectory( const string& rootDir, const SlaveID& slaveId, diff --git a/src/slave/paths.hpp b/src/slave/paths.hpp index 847665c..ad76826 100644 --- a/src/slave/paths.hpp +++ b/src/slave/paths.hpp @@ -73,39 +73,41 @@ namespace paths { // | | |-- resources.target // | | |-- resources_and_operations.state // | |-- slaves -// | |-- latest (symlink) -// | |-- <slave_id> -// | |-- slave.info -// | |-- operations -// | | |-- <operation_uuid> -// | | |-- operation.updates -// | |-- resource_providers -// | | |-- <type> -// | | |-- <name> -// | | |-- latest (symlink) -// | | |-- <resource_provider_id> -// | | |-- resource_provider.state -// | | |-- operations -// | | |-- <operation_uuid> -// | | |-- operation.updates -// | |-- frameworks -// | |-- <framework_id> -// | |-- framework.info -// | |-- framework.pid -// | |-- executors -// | |-- <executor_id> -// | |-- executor.info -// | |-- runs -// | |-- latest (symlink) -// | |-- <container_id> (sandbox) -// | |-- executor.sentinel (if completed) -// | |-- pids -// | | |-- forked.pid -// | | |-- libprocess.pid -// | |-- tasks -// | |-- <task_id> -// | |-- task.info -// | |-- task.updates +// | | |-- latest (symlink) +// | | |-- <slave_id> +// | | |-- slave.info +// | | |-- operations +// | | | |-- <operation_uuid> +// | | | |-- operation.updates +// | | |-- resource_providers +// | | | |-- <type> +// | | | |-- <name> +// | | | |-- latest (symlink) +// | | | |-- <resource_provider_id> +// | | | |-- resource_provider.state +// | | | |-- operations +// | | | |-- <operation_uuid> +// | | | |-- operation.updates +// | | |-- frameworks +// | | |-- <framework_id> +// | | |-- framework.info +// | | |-- framework.pid +// | | |-- executors +// | | |-- <executor_id> +// | | |-- executor.info +// | | |-- runs +// | | |-- latest (symlink) +// | | |-- <container_id> (sandbox) +// | | |-- executor.sentinel (if completed) +// | | |-- pids +// | | | |-- forked.pid +// | | | |-- libprocess.pid +// | | |-- tasks +// | | |-- <task_id> +// | | |-- task.info +// | | |-- task.updates +// | |-- volume_gid_manager +// | |-- volume_gids // |-- volumes // | |-- roles // | |-- <role> @@ -435,6 +437,9 @@ std::string getPersistentVolumePath( const Resource& resource); +std::string getVolumeGidsPath(const std::string& rootDir); + + Try<std::string> createExecutorDirectory( const std::string& rootDir, const SlaveID& slaveId, diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp index e58684a..f9b5817 100644 --- a/src/slave/slave.cpp +++ b/src/slave/slave.cpp @@ -7362,14 +7362,38 @@ Future<Nothing> Slave::recover(const Try<state::State>& state) } } - return taskStatusUpdateManager->recover(metaDir, slaveState) + return _recoverVolumeGidManager(state->rebooted) + .then(defer(self(), &Slave::_recoverTaskStatusUpdates, slaveState)) .then(defer(self(), &Slave::_recoverContainerizer, slaveState)) .then(defer(self(), &Slave::_recoverOperations, slaveState)); } +Future<Nothing> Slave::_recoverVolumeGidManager(bool rebooted) +{ +#ifndef __WINDOWS__ + if (volumeGidManager) { + return volumeGidManager->recover(rebooted); + } + return Nothing(); +#else + return Nothing(); +#endif // __WINDOWS__ +} + + +Future<Option<SlaveState>> Slave::_recoverTaskStatusUpdates( + const Option<SlaveState>& state) +{ + return taskStatusUpdateManager->recover(metaDir, state) + .then([state]() -> Future<Option<SlaveState>> { + return state; + }); +} + + Future<Nothing> Slave::_recoverContainerizer( - const Option<state::SlaveState>& state) + const Option<SlaveState>& state) { return containerizer->recover(state); } diff --git a/src/slave/slave.hpp b/src/slave/slave.hpp index 808531c..d9dbecd 100644 --- a/src/slave/slave.hpp +++ b/src/slave/slave.hpp @@ -547,6 +547,13 @@ public: // executors. Otherwise, the slave attempts to shutdown/kill them. process::Future<Nothing> _recover(); + // This is a helper to call `recover()` on the volume gid manager. + process::Future<Nothing> _recoverVolumeGidManager(bool rebooted); + + // This is a helper to call `recover()` on the task status update manager. + process::Future<Option<state::SlaveState>> _recoverTaskStatusUpdates( + const Option<state::SlaveState>& slaveState); + // This is a helper to call `recover()` on the containerizer at the end of // `recover()` and before `__recover()`. // TODO(idownes): Remove this when we support defers to objects. diff --git a/src/slave/volume_gid_manager/volume_gid_manager.cpp b/src/slave/volume_gid_manager/volume_gid_manager.cpp index 49f70a2..d887387 100644 --- a/src/slave/volume_gid_manager/volume_gid_manager.cpp +++ b/src/slave/volume_gid_manager/volume_gid_manager.cpp @@ -24,6 +24,7 @@ #include <process/dispatch.hpp> #include <process/id.hpp> +#include <stout/os/exists.hpp> #include <stout/os/su.hpp> #include "common/values.hpp" @@ -32,6 +33,9 @@ #include "linux/fs.hpp" #endif // __linux__ +#include "slave/paths.hpp" +#include "slave/state.hpp" + #include "slave/volume_gid_manager/volume_gid_manager.hpp" using std::string; @@ -42,6 +46,7 @@ using process::Failure; using process::Future; using process::Owned; +using mesos::internal::values::intervalSetToRanges; using mesos::internal::values::rangesToIntervalSet; namespace mesos { @@ -152,10 +157,75 @@ static Try<Nothing> setVolumeOwnership( class VolumeGidManagerProcess : public process::Process<VolumeGidManagerProcess> { public: - VolumeGidManagerProcess(const IntervalSet<gid_t>& gids) + VolumeGidManagerProcess( + const IntervalSet<gid_t>& gids, + const string& workDir) : ProcessBase(process::ID::generate("volume-gid-manager")), totalGids(gids), - freeGids(gids) {} + freeGids(gids), + metaDir(paths::getMetaRootDir(workDir)) {} + + Future<Nothing> recover(bool rebooted) + { + LOG(INFO) << "Recovering volume gid manager"; + + const string volumeGidsPath = paths::getVolumeGidsPath(metaDir); + if (os::exists(volumeGidsPath)) { + Result<VolumeGidInfos> volumeGidInfos = + state::read<VolumeGidInfos>(volumeGidsPath); + + if (volumeGidInfos.isError()) { + return Failure( + "Failed to read volume gid infos from '" + volumeGidsPath + + "' " + volumeGidInfos.error()); + } else if (volumeGidInfos.isNone()) { + // This could happen if the agent is hard rebooted after the file is + // created but before the data is synced on disk. + LOG(WARNING) << "The volume gids file '" + << volumeGidsPath << "' is empty"; + } else { + CHECK_SOME(volumeGidInfos); + + hashset<string> orphans; + foreach (const VolumeGidInfo& info, volumeGidInfos->infos()) { + freeGids -= info.gid(); + infos.put(info.path(), info); + + // Normally the gid allocated to the PARENT type SANDBOX_PATH + // volume is deallocated when the parent container is destroyed, + // However after agent reboot, containerizer will not destroy any + // containers since all containers are already gone, so to avoid + // gid leak in this case, we need to deallocate gid for the PARENT + // type SANDBOX_PATH volume here. + if (rebooted && info.type() == VolumeGidInfo::SANDBOX_PATH) { + LOG(INFO) << "Deallocating gid " << info.gid() << " for the PARENT" + << "type SANDBOX_PATH volume '" << info.path() + << "' after agent reboot"; + + orphans.insert(Path(info.path()).dirname()); + continue; + } + + // This could happen in the case that agent crashes after the + // shared persistent volume is deleted but before volume gid + // manager deallocates its gid. + if (!os::exists(info.path())) { + LOG(WARNING) << "Deallocating gid " << info.gid() << " for the " + << "non-existent volume path '" << info.path() << "'"; + + orphans.insert(info.path()); + } + } + + // Deallocate all the orphaned paths. + foreach (const string& path, orphans) { + deallocate(path); + } + } + } + + return Nothing(); + } // This method will be called when a container running as non-root user tries // to use a shared persistent volume or a PARENT type SANDBOX_PATH volume, the @@ -217,13 +287,6 @@ public: LOG(INFO) << "Allocating gid " << gid << " to the volume path '" << path << "'"; - Try<Nothing> result = setVolumeOwnership(path, gid, true); - if (result.isError()) { - return Failure( - "Failed to set the owner group of the volume path '" + path + - "' to " + stringify(gid) + ": " + result.error()); - } - freeGids -= gid; VolumeGidInfo info; @@ -232,6 +295,19 @@ public: info.set_gid(gid); infos.put(path, info); + + Try<Nothing> status = persist(); + if (status.isError()) { + return Failure( + "Failed to save state of volume gid infos: " + status.error()); + } + + Try<Nothing> result = setVolumeOwnership(path, gid, true); + if (result.isError()) { + return Failure( + "Failed to set the owner group of the volume path '" + path + + "' to " + stringify(gid) + ": " + result.error()); + } } } @@ -251,6 +327,7 @@ public: { vector<string> sandboxPathVolumes; + bool changed = false; for (auto it = infos.begin(); it != infos.end(); ) { const VolumeGidInfo& info = it->second; const string& volumePath = info.path(); @@ -275,6 +352,7 @@ public: } it = infos.erase(it); + changed = true; } else { ++it; } @@ -320,13 +398,40 @@ public: } } + if (changed) { + Try<Nothing> status = persist(); + if (status.isError()) { + return Failure( + "Failed to save state of volume gid infos: " + status.error()); + } + } + return Nothing(); } private: + Try<Nothing> persist() + { + VolumeGidInfos volumeGidInfos; + foreachvalue (const VolumeGidInfo& info, infos) { + volumeGidInfos.add_infos()->CopyFrom(info); + } + + Try<Nothing> status = state::checkpoint( + paths::getVolumeGidsPath(metaDir), volumeGidInfos); + + if (status.isError()) { + return Error("Failed to perform checkpoint: " + status.error()); + } + + return Nothing(); + } + const IntervalSet<gid_t> totalGids; IntervalSet<gid_t> freeGids; + const string metaDir; + // Allocated gid infos keyed by the volume path. hashmap<string, VolumeGidInfo> infos; }; @@ -367,8 +472,8 @@ Try<VolumeGidManager*> VolumeGidManager::create(const Flags& flags) return Error("Empty volume gid range"); } - return new VolumeGidManager( - Owned<VolumeGidManagerProcess>(new VolumeGidManagerProcess(gids.get()))); + return new VolumeGidManager(Owned<VolumeGidManagerProcess>( + new VolumeGidManagerProcess(gids.get(), flags.work_dir))); } @@ -387,6 +492,12 @@ VolumeGidManager::~VolumeGidManager() } +Future<Nothing> VolumeGidManager::recover(bool rebooted) const +{ + return dispatch(process.get(), &VolumeGidManagerProcess::recover, rebooted); +} + + Future<gid_t> VolumeGidManager::allocate( const string& path, VolumeGidInfo::Type type) const diff --git a/src/slave/volume_gid_manager/volume_gid_manager.hpp b/src/slave/volume_gid_manager/volume_gid_manager.hpp index 51732af..838728b 100644 --- a/src/slave/volume_gid_manager/volume_gid_manager.hpp +++ b/src/slave/volume_gid_manager/volume_gid_manager.hpp @@ -44,6 +44,8 @@ public: ~VolumeGidManager(); + process::Future<Nothing> recover(bool rebooted) const; + process::Future<gid_t> allocate( const std::string& path, VolumeGidInfo::Type type) const;
