Implemented pruneImages with a mark and sweep in docker store. This includes the following changes: - add a `pruneImages()` function on the chain of relevant classes; - implement prune in docker store; - fix mock interface to keep existing tests pass.
Review: https://reviews.apache.org/r/56721/ Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bdb604a9 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bdb604a9 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bdb604a9 Branch: refs/heads/master Commit: bdb604a9dc29ab7bc4b9398cf4c1a2bd8b6061c4 Parents: e273efe Author: Zhitao Li <zhitaoli...@gmail.com> Authored: Fri Nov 17 16:36:31 2017 -0800 Committer: Gilbert Song <songzihao1...@gmail.com> Committed: Mon Nov 20 12:29:53 2017 -0800 ---------------------------------------------------------------------- src/slave/containerizer/composing.cpp | 21 +++ src/slave/containerizer/composing.hpp | 2 + src/slave/containerizer/containerizer.hpp | 3 + src/slave/containerizer/docker.cpp | 7 + src/slave/containerizer/docker.hpp | 2 + src/slave/containerizer/mesos/containerizer.cpp | 39 ++++ src/slave/containerizer/mesos/containerizer.hpp | 4 + .../provisioner/docker/metadata_manager.cpp | 50 ++++- .../provisioner/docker/metadata_manager.hpp | 14 ++ .../mesos/provisioner/docker/paths.cpp | 25 +++ .../mesos/provisioner/docker/paths.hpp | 15 ++ .../mesos/provisioner/docker/store.cpp | 151 ++++++++++++++- .../mesos/provisioner/docker/store.hpp | 5 + .../mesos/provisioner/provisioner.cpp | 184 ++++++++++++++----- .../mesos/provisioner/provisioner.hpp | 24 +++ .../containerizer/mesos/provisioner/store.cpp | 10 + .../containerizer/mesos/provisioner/store.hpp | 18 ++ src/tests/containerizer.cpp | 17 ++ src/tests/containerizer.hpp | 7 + src/tests/containerizer/mock_containerizer.hpp | 2 + 20 files changed, 547 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp index 64919ef..9a9e11b 100644 --- a/src/slave/containerizer/composing.cpp +++ b/src/slave/containerizer/composing.cpp @@ -95,6 +95,8 @@ public: Future<Nothing> remove(const ContainerID& containerId); + Future<Nothing> pruneImages(); + private: // Continuations. Future<Nothing> _recover(); @@ -257,6 +259,12 @@ Future<Nothing> ComposingContainerizer::remove(const ContainerID& containerId) } +Future<Nothing> ComposingContainerizer::pruneImages() +{ + return dispatch(process, &ComposingContainerizerProcess::pruneImages); +} + + ComposingContainerizerProcess::~ComposingContainerizerProcess() { foreach (Containerizer* containerizer, containerizers_) { @@ -687,6 +695,19 @@ Future<Nothing> ComposingContainerizerProcess::remove( return containers_[rootContainerId]->containerizer->remove(containerId); } + +Future<Nothing> ComposingContainerizerProcess::pruneImages() +{ + list<Future<Nothing>> futures; + + foreach (Containerizer* containerizer, containerizers_) { + futures.push_back(containerizer->pruneImages()); + } + + return collect(futures) + .then([]() { return Nothing(); }); +} + } // namespace slave { } // namespace internal { } // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/composing.hpp b/src/slave/containerizer/composing.hpp index c2689cf..3d00609 100644 --- a/src/slave/containerizer/composing.hpp +++ b/src/slave/containerizer/composing.hpp @@ -86,6 +86,8 @@ public: virtual process::Future<Nothing> remove(const ContainerID& containerId); + virtual process::Future<Nothing> pruneImages(); + private: ComposingContainerizerProcess* process; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/containerizer.hpp b/src/slave/containerizer/containerizer.hpp index 2027bd9..7a0c6fc 100644 --- a/src/slave/containerizer/containerizer.hpp +++ b/src/slave/containerizer/containerizer.hpp @@ -165,6 +165,9 @@ public: { return process::Failure("Unsupported"); } + + // Prune unused images from supported image stores. + virtual process::Future<Nothing> pruneImages() = 0; }; } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/docker.cpp b/src/slave/containerizer/docker.cpp index 63432a9..9918d83 100644 --- a/src/slave/containerizer/docker.cpp +++ b/src/slave/containerizer/docker.cpp @@ -876,6 +876,13 @@ Future<hashset<ContainerID>> DockerContainerizer::containers() } +Future<Nothing> DockerContainerizer::pruneImages() +{ + VLOG(1) << "DockerContainerizer does not support pruneImages"; + return Nothing(); +} + + Future<Nothing> DockerContainerizerProcess::recover( const Option<SlaveState>& state) { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/docker.hpp b/src/slave/containerizer/docker.hpp index 105c068..9df9849 100644 --- a/src/slave/containerizer/docker.hpp +++ b/src/slave/containerizer/docker.hpp @@ -109,6 +109,8 @@ public: virtual process::Future<hashset<ContainerID>> containers(); + virtual process::Future<Nothing> pruneImages(); + private: process::Owned<DockerContainerizerProcess> process; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp index bf71db1..7f3b86d 100644 --- a/src/slave/containerizer/mesos/containerizer.cpp +++ b/src/slave/containerizer/mesos/containerizer.cpp @@ -656,6 +656,12 @@ Future<Nothing> MesosContainerizer::remove(const ContainerID& containerId) } +Future<Nothing> MesosContainerizer::pruneImages() +{ + return dispatch(process.get(), &MesosContainerizerProcess::pruneImages); +} + + Future<Nothing> MesosContainerizerProcess::recover( const Option<state::SlaveState>& state) { @@ -2818,6 +2824,39 @@ Future<hashset<ContainerID>> MesosContainerizerProcess::containers() } +Future<Nothing> MesosContainerizerProcess::pruneImages() +{ + vector<Image> excludedImages; + excludedImages.reserve(containers_.size()); + + foreachpair ( + const ContainerID& containerId, + const Owned<Container>& container, + containers_) { + // Checkpointing ContainerConfig is introduced recently. Legacy containers + // do not have the information of which image is used. Image pruning is + // disabled. + if (container->config.isNone()) { + return Failure( + "Container " + stringify(containerId) + + " does not have ContainerConfig " + "checkpointed. Image pruning is disabled"); + } + + const ContainerConfig& containerConfig = container->config.get(); + if (containerConfig.has_container_info() && + containerConfig.container_info().mesos().has_image()) { + excludedImages.push_back( + containerConfig.container_info().mesos().image()); + } + } + + // TODO(zhitao): use std::unique to deduplicate `excludedImages`. + + return provisioner->pruneImages(excludedImages); +} + + MesosContainerizerProcess::Metrics::Metrics() : container_destroy_errors( "containerizer/mesos/container_destroy_errors") http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp index e859b5b..e2739e0 100644 --- a/src/slave/containerizer/mesos/containerizer.hpp +++ b/src/slave/containerizer/mesos/containerizer.hpp @@ -111,6 +111,8 @@ public: virtual process::Future<Nothing> remove(const ContainerID& containerId); + virtual process::Future<Nothing> pruneImages(); + private: explicit MesosContainerizer( const process::Owned<MesosContainerizerProcess>& process); @@ -181,6 +183,8 @@ public: virtual process::Future<hashset<ContainerID>> containers(); + virtual process::Future<Nothing> pruneImages(); + private: enum State { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp index d86afd2..1ab66c1 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp +++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp @@ -67,7 +67,8 @@ public: const spec::ImageReference& reference, bool cached); - // TODO(chenlily): Implement removal of unreferenced images. + Future<hashset<string>> prune( + const vector<spec::ImageReference>& excludedImages); private: // Write out metadata manager state to persistent store. @@ -134,6 +135,16 @@ Future<Option<Image>> MetadataManager::get( } +Future<hashset<string>> MetadataManager::prune( + const vector<spec::ImageReference>& excludedImages) +{ + return dispatch( + process.get(), + &MetadataManagerProcess::prune, + excludedImages); +} + + Future<Image> MetadataManagerProcess::put( const spec::ImageReference& reference, const vector<string>& layerIds) @@ -180,6 +191,43 @@ Future<Option<Image>> MetadataManagerProcess::get( } +Future<hashset<string>> MetadataManagerProcess::prune( + const vector<spec::ImageReference>& excludedImages) +{ + hashmap<string, Image> retainedImages; + hashset<string> retainedLayers; + + foreach (const spec::ImageReference& reference, excludedImages) { + const string imageName = stringify(reference); + Option<Image> image = storedImages.get(imageName); + + if (image.isNone()) { + // This is possible if docker store was cleaned + // in a recovery after the container using this image was + // launched. + VLOG(1) << "Excluded docker image '" << imageName + << "' is not cached in metadata manager."; + continue; + } + + retainedImages[imageName] = image.get(); + + foreach (const string& layerId, image->layer_ids()) { + retainedLayers.insert(layerId); + } + } + + storedImages = std::move(retainedImages); + + Try<Nothing> status = persist(); + if (status.isError()) { + return Failure("Failed to save state of Docker images: " + status.error()); + } + + return retainedLayers; +} + + Try<Nothing> MetadataManagerProcess::persist() { Images images; http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp index 954da16..cfafd44 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp +++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp @@ -21,6 +21,7 @@ #include <string> #include <stout/hashmap.hpp> +#include <stout/hashset.hpp> #include <stout/json.hpp> #include <stout/option.hpp> #include <stout/protobuf.hpp> @@ -92,6 +93,19 @@ public: const ::docker::spec::ImageReference& reference, bool cached); + /** + * Prune images from the metadata manager by comparing + * existing images with active images in use. This function will + * remove all images not used anymore, and return the list of + * layers which are still referenced. The caller should + * ensure such layers are kept in best effort. + * + * @param excludedImages all images to exclude from pruning. + * @return a list of all layers still refered. + */ + process::Future<hashset<std::string>> prune( + const std::vector<::docker::spec::ImageReference>& excludedImages); + private: explicit MetadataManager(process::Owned<MetadataManagerProcess> process); http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp index cd684b3..f692552 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp +++ b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp @@ -18,8 +18,12 @@ #include "slave/containerizer/mesos/provisioner/docker/paths.hpp" +#include <process/clock.hpp> + +#include <stout/os.hpp> #include <stout/path.hpp> +using std::list; using std::string; namespace mesos { @@ -46,6 +50,13 @@ string getImageLayerPath(const string& storeDir, const string& layerId) } +Try<list<string>> listLayers(const string& storeDir) +{ + const string layersDir = path::join(storeDir, "layers"); + return os::ls(layersDir); +} + + string getImageLayerManifestPath(const string& layerPath) { return path::join(layerPath, "json"); @@ -100,6 +111,20 @@ string getStoredImagesPath(const string& storeDir) return path::join(storeDir, "storedImages"); } + +string getGcDir(const string& storeDir) +{ + return path::join(storeDir, "gc"); +} + + +string getGcLayerPath(const string& storeDir, const string& layerId) +{ + return path::join( + getGcDir(storeDir), + layerId + "." + stringify(process::Clock::now().duration().ns())); +} + } // namespace paths { } // namespace docker { } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp index 232c027..0cd7f31 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp +++ b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp @@ -17,8 +17,11 @@ #ifndef __PROVISIONER_DOCKER_PATHS_HPP__ #define __PROVISIONER_DOCKER_PATHS_HPP__ +#include <list> #include <string> +#include <stout/try.hpp> + namespace mesos { namespace internal { namespace slave { @@ -40,6 +43,7 @@ namespace paths { * |-- json(manifest) * |-- VERSION * |--storedImages (file holding on cached images) + * |--gc (dir holding marked layers to be sweeped) */ // TODO(gilbert): Clean up any unused method after refactoring. @@ -90,6 +94,17 @@ std::string getImageArchiveTarPath( std::string getStoredImagesPath(const std::string& storeDir); + +std::string getGcDir(const std::string& storeDir); + + +std::string getGcLayerPath( + const std::string& storeDir, + const std::string& layerId); + + +Try<std::list<std::string>> listLayers(const std::string& storeDir); + } // namespace paths { } // namespace docker { } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.cpp b/src/slave/containerizer/mesos/provisioner/docker/store.cpp index f357710..d64e6eb 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/store.cpp +++ b/src/slave/containerizer/mesos/provisioner/docker/store.cpp @@ -24,13 +24,16 @@ #include <mesos/secret/resolver.hpp> #include <stout/hashmap.hpp> +#include <stout/hashset.hpp> #include <stout/json.hpp> #include <stout/os.hpp> #include <process/collect.hpp> #include <process/defer.hpp> #include <process/dispatch.hpp> +#include <process/executor.hpp> #include <process/id.hpp> +#include <process/metrics/counter.hpp> #include "slave/containerizer/mesos/provisioner/constants.hpp" #include "slave/containerizer/mesos/provisioner/utils.hpp" @@ -75,7 +78,9 @@ public: : ProcessBase(process::ID::generate("docker-provisioner-store")), flags(_flags), metadataManager(_metadataManager), - puller(_puller) {} + puller(_puller) + { + } ~StoreProcess() {} @@ -85,6 +90,10 @@ public: const mesos::Image& image, const string& backend); + Future<Nothing> prune( + const std::vector<mesos::Image>& excludeImages, + const hashset<string>& activeLayerPaths); + private: Future<Image> _get( const spec::ImageReference& reference, @@ -106,11 +115,18 @@ private: const string& layerId, const string& backend); + Future<Nothing> _prune( + const hashset<string>& activeLayerPaths, + const hashset<string>& retainedImageLayers); + const Flags flags; Owned<MetadataManager> metadataManager; Owned<Puller> puller; hashmap<string, Owned<Promise<Image>>> pulling; + + // For executing path removals in a separated actor. + process::Executor executor; }; @@ -164,6 +180,12 @@ Try<Owned<slave::Store>> Store::create( mkdir.error()); } + mkdir = os::mkdir(paths::getGcDir(flags.docker_store_dir)); + if (mkdir.isError()) { + return Error("Failed to create Docker store gc directory: " + + mkdir.error()); + } + Try<Owned<MetadataManager>> metadataManager = MetadataManager::create(flags); if (metadataManager.isError()) { return Error(metadataManager.error()); @@ -203,6 +225,15 @@ Future<ImageInfo> Store::get( } +Future<Nothing> Store::prune( + const vector<mesos::Image>& excludedImages, + const hashset<string>& activeLayerPaths) +{ + return dispatch( + process.get(), &StoreProcess::prune, excludedImages, activeLayerPaths); +} + + Future<Nothing> StoreProcess::recover() { return metadataManager->recover(); @@ -447,6 +478,124 @@ Future<Nothing> StoreProcess::moveLayer( return Nothing(); } + +Future<Nothing> StoreProcess::prune( + const vector<mesos::Image>& excludedImages, + const hashset<string>& activeLayerPaths) +{ + // All existing pulling should have finished. + if (!pulling.empty()) { + return Failure("Cannot prune and pull at the same time"); + } + + vector<spec::ImageReference> imageReferences; + imageReferences.reserve(excludedImages.size()); + + foreach (const mesos::Image& image, excludedImages) { + Try<spec::ImageReference> reference = + spec::parseImageReference(image.docker().name()); + + if (reference.isError()) { + return Failure( + "Failed to parse docker image '" + image.docker().name() + + "': " + reference.error()); + } + + imageReferences.push_back(reference.get()); + } + + return metadataManager->prune(imageReferences) + .then(defer(self(), &Self::_prune, activeLayerPaths, lambda::_1)); +} + + +Future<Nothing> StoreProcess::_prune( + const hashset<string>& activeLayerRootfses, + const hashset<string>& retainedLayerIds) +{ + Try<list<string>> allLayers = paths::listLayers(flags.docker_store_dir); + if (allLayers.isError()) { + return Failure("Failed to find all layer paths: " + allLayers.error()); + } + + // Paths in provisioner are layer rootfs. Normalize them to layer + // path. + hashset<string> activeLayerPaths; + + foreach (const string& rootfsPath, activeLayerRootfses) { + activeLayerPaths.insert(Path(rootfsPath).dirname()); + } + + foreach (const string& layerId, allLayers.get()) { + if (retainedLayerIds.contains(layerId)) { + VLOG(1) << "Layer '" << layerId << "' is retained by image store cache"; + continue; + } + + const string layerPath = + paths::getImageLayerPath(flags.docker_store_dir, layerId); + + if (activeLayerPaths.contains(layerPath)) { + VLOG(1) << "Layer '" << layerId << "' is retained by active container"; + continue; + } + + const string target = + paths::getGcLayerPath(flags.docker_store_dir, layerId); + + if (os::exists(target)) { + return Failure("Marking phase target '" + target + "' already exists"); + } + + VLOG(1) << "Marking layer '" << layerId << "' to gc by renaming '" + << layerPath << "' to '" << target << "'"; + + Try<Nothing> rename = os::rename(layerPath, target); + if (rename.isError()) { + return Failure( + "Failed to move layer from '" + layerPath + + "' to '" + target + "': " + rename.error()); + } + } + + const string gcDir = paths::getGcDir(flags.docker_store_dir); + auto rmdirs = [gcDir]() { + Try<list<string>> targets = os::ls(gcDir); + if (targets.isError()) { + LOG(WARNING) << "Error when listing gcDir '" << gcDir + << "': " << targets.error(); + return Nothing(); + } + + foreach (const string& target, targets.get()) { + const string path = path::join(gcDir, target); + // Run the removal operation with 'continueOnError = false'. + // A possible situation is that we incorrectly marked a layer + // which is still used by certain layer based backends (aufs, overlay). + // In such a case, we proceed with a warning and try to free up as much + // disk spaces as possible. + LOG(INFO) << "Deleting path '" << path << "'"; + Try<Nothing> rmdir = os::rmdir(path, true, true, false); + + if (rmdir.isError()) { + LOG(WARNING) << "Failed to delete '" << path << "': " + << rmdir.error(); + } else { + LOG(INFO) << "Deleted '" << path << "'"; + } + } + + return Nothing(); + }; + + // NOTE: All `rmdirs` calls are dispatched to one executor so that: + // 1. They do not block other dispatches; + // 2. They do not occupy all worker threads. + executor.execute(rmdirs); + + return Nothing(); +} + } // namespace docker { } // namespace slave { } // namespace internal { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.hpp b/src/slave/containerizer/mesos/provisioner/docker/store.hpp index 1cf6866..a420fa0 100644 --- a/src/slave/containerizer/mesos/provisioner/docker/store.hpp +++ b/src/slave/containerizer/mesos/provisioner/docker/store.hpp @@ -21,6 +21,7 @@ #include <process/owned.hpp> +#include <stout/hashset.hpp> #include <stout/try.hpp> #include "slave/flags.hpp" @@ -58,6 +59,10 @@ public: const mesos::Image& image, const std::string& backend); + virtual process::Future<Nothing> prune( + const std::vector<mesos::Image>& excludeImages, + const hashset<std::string>& activeLayerPaths); + private: explicit Store(process::Owned<StoreProcess> process); http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp b/src/slave/containerizer/mesos/provisioner/provisioner.cpp index 1723588..d19dc1a 100644 --- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp +++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp @@ -32,6 +32,7 @@ #include <stout/foreach.hpp> #include <stout/hashmap.hpp> #include <stout/hashset.hpp> +#include <stout/lambda.hpp> #include <stout/os.hpp> #include <stout/stringify.hpp> #include <stout/uuid.hpp> @@ -56,6 +57,7 @@ using std::vector; using process::Failure; using process::Future; using process::Owned; +using process::ReadWriteLock; using mesos::internal::slave::AUFS_BACKEND; using mesos::internal::slave::BIND_BACKEND; @@ -312,6 +314,16 @@ Future<bool> Provisioner::destroy(const ContainerID& containerId) const } +Future<Nothing> Provisioner::pruneImages( + const vector<Image>& excludedImages) const +{ + return dispatch( + CHECK_NOTNULL(process.get()), + &ProvisionerProcess::pruneImages, + excludedImages); +} + + ProvisionerProcess::ProvisionerProcess( const string& _rootDir, const string& _defaultBackend, @@ -450,20 +462,28 @@ Future<ProvisionInfo> ProvisionerProcess::provision( const ContainerID& containerId, const Image& image) { - if (!stores.contains(image.type())) { - return Failure( - "Unsupported container image type: " + - stringify(image.type())); - } + // `destroy` and `provision` can happen concurrently, but `pruneImages` + // is exclusive. + return rwLock.read_lock() + .then(defer(self(), [this, containerId, image]() -> Future<ProvisionInfo> { + if (!stores.contains(image.type())) { + return Failure( + "Unsupported container image type: " + stringify(image.type())); + } - // Get and then provision image layers from the store. - return stores.get(image.type()).get()->get(image, defaultBackend) - .then(defer(self(), - &Self::_provision, - containerId, - image, - defaultBackend, - lambda::_1)); + // Get and then provision image layers from the store. + return stores.get(image.type()).get()->get(image, defaultBackend) + .then(defer( + self(), + &Self::_provision, + containerId, + image, + defaultBackend, + lambda::_1)); + })) + .onAny(defer(self(), [this](const Future<ProvisionInfo>&) { + rwLock.read_unlock(); + })); } @@ -510,7 +530,7 @@ Future<ProvisionInfo> ProvisionerProcess::_provision( ContainerLayers containerLayers; - foreach(const string& layer, imageInfo.layers) { + foreach (const string& layer, imageInfo.layers) { containerLayers.add_paths(layer); } @@ -529,46 +549,55 @@ Future<ProvisionInfo> ProvisionerProcess::_provision( Future<bool> ProvisionerProcess::destroy(const ContainerID& containerId) { - if (!infos.contains(containerId)) { - VLOG(1) << "Ignoring destroy request for unknown container " << containerId; - - return false; - } - - if (infos[containerId]->destroying) { - return infos[containerId]->termination.future(); - } + // `destroy` and `provision` can happen concurrently, but `pruneImages` + // is exclusive. + return rwLock.read_lock() + .then(defer(self(), [this, containerId]() -> Future<bool> { + if (!infos.contains(containerId)) { + VLOG(1) << "Ignoring destroy request for unknown container " + << containerId; + + return false; + } - infos[containerId]->destroying = true; + if (infos[containerId]->destroying) { + return infos[containerId]->termination.future(); + } - // Provisioner destroy can be invoked from: - // 1. Provisioner `recover` to destroy all unknown orphans. - // 2. Containerizer `recover` to destroy known orphans. - // 3. Containerizer `destroy` on one specific container. - // - // NOTE: For (2) and (3), we expect the container being destroyed - // has no any child contain remain running. However, for case (1), - // if the container runtime directory does not survive after the - // machine reboots and the provisioner directory under the agent - // work dir still exists, all containers will be regarded as - // unknown containers and will be destroyed. In this case, a parent - // container may be destroyed before its child containers are - // cleaned up. So we have to make `destroy()` recursively for - // this particular case. - // - // TODO(gilbert): Move provisioner directory to the container - // runtime directory after a deprecation cycle to avoid - // making `provisioner::destroy()` being recursive. - list<Future<bool>> destroys; - - foreachkey (const ContainerID& entry, infos) { - if (entry.has_parent() && entry.parent() == containerId) { - destroys.push_back(destroy(entry)); - } - } + infos[containerId]->destroying = true; + + // Provisioner destroy can be invoked from: + // 1. Provisioner `recover` to destroy all unknown orphans. + // 2. Containerizer `recover` to destroy known orphans. + // 3. Containerizer `destroy` on one specific container. + // + // NOTE: For (2) and (3), we expect the container being destroyed + // has no any child contain remain running. However, for case (1), + // if the container runtime directory does not survive after the + // machine reboots and the provisioner directory under the agent + // work dir still exists, all containers will be regarded as + // unknown containers and will be destroyed. In this case, a parent + // container may be destroyed before its child containers are + // cleaned up. So we have to make `destroy()` recursively for + // this particular case. + // + // TODO(gilbert): Move provisioner directory to the container + // runtime directory after a deprecation cycle to avoid + // making `provisioner::destroy()` being recursive. + list<Future<bool>> destroys; + + foreachkey (const ContainerID& entry, infos) { + if (entry.has_parent() && entry.parent() == containerId) { + destroys.push_back(destroy(entry)); + } + } - return await(destroys) - .then(defer(self(), &Self::_destroy, containerId, lambda::_1)); + return await(destroys) + .then(defer(self(), &Self::_destroy, containerId, lambda::_1)); + })) + .onAny(defer(self(), [this](const Future<bool>&) { + rwLock.read_unlock(); + })); } @@ -661,6 +690,59 @@ Future<bool> ProvisionerProcess::__destroy(const ContainerID& containerId) } +Future<Nothing> ProvisionerProcess::pruneImages( + const vector<Image>& excludedImages) +{ + // `destroy` and `provision` can happen concurrently, but `pruneImages` + // is exclusive. + return rwLock.write_lock() + .then(defer(self(), [this, excludedImages]() -> Future<Nothing> { + hashset<string> activeLayerPaths; + + foreachpair ( + const ContainerID& containerId, const Owned<Info>& info, infos) { + if (info->layers.isNone()) { + // There are several possibilities if layer information missing: + // - legacy containers provisioned before layer checkpointing: + // they should already be excluded by the containerizer; + // - the agent crashed after `backend::provision()` finished but + // before checkpointing the `layers`. In such a case, the rootfs + // should not be used by any running containers yet so it is safe + // to skip those layers; + // - checkpointed layer files were manually deleted: we do not expect + // this to be allowd, but log it for information purpose. + VLOG(1) << "Container " << containerId + << " has no checkpointed layers"; + + continue; + } + + activeLayerPaths.insert(info->layers->begin(), info->layers->end()); + } + + list<Future<Nothing>> futures; + + foreachpair ( + const Image::Type& type, const Owned<Store>& store, stores) { + vector<Image> images; + foreach (const Image& image, excludedImages) { + if (image.type() == type) { + images.push_back(image); + } + } + + futures.push_back(store.get()->prune(images, activeLayerPaths)); + } + + return collect(futures) + .then([]() { return Nothing(); }); + })) + .onAny(defer(self(), [this](const Future<Nothing>&) { + rwLock.write_unlock(); + })); +} + + ProvisionerProcess::Metrics::Metrics() : remove_container_errors( "containerizer/mesos/provisioner/remove_container_errors") http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.hpp b/src/slave/containerizer/mesos/provisioner/provisioner.hpp index b87144c..88d7019 100644 --- a/src/slave/containerizer/mesos/provisioner/provisioner.hpp +++ b/src/slave/containerizer/mesos/provisioner/provisioner.hpp @@ -34,6 +34,7 @@ #include <process/future.hpp> #include <process/owned.hpp> +#include <process/rwlock.hpp> #include <process/metrics/counter.hpp> #include <process/metrics/metrics.hpp> @@ -102,6 +103,12 @@ public: // provisioned root filesystem for the given container. virtual process::Future<bool> destroy(const ContainerID& containerId) const; + // Prune images in different stores. Image references in excludedImages + // will be passed to stores and retained in a best effort fashion. + // All layer paths used by active containers will not be pruned. + virtual process::Future<Nothing> pruneImages( + const std::vector<Image>& excludedImages) const; + protected: Provisioner() {} // For creating mock object. @@ -132,6 +139,9 @@ public: process::Future<bool> destroy(const ContainerID& containerId); + process::Future<Nothing> pruneImages( + const std::vector<Image>& excludedImages); + private: process::Future<ProvisionInfo> _provision( const ContainerID& containerId, @@ -186,6 +196,20 @@ private: process::metrics::Counter remove_container_errors; } metrics; + + // This `ReadWriteLock` instance is used to protect the critical + // section, which includes store directory and provision directory. + // Because `provision` and `destroy` are scoped by `containerId`, + // they are not expected to touch the same critical section + // simultaneously, so any `provision` and `destroy` can happen concurrently. + // This is guaranteed by Mesos containerizer, e.g., a `destroy` will always + // wait for a container's `provision` to finish, then do the cleanup. + // + // On the other hand, `pruneImages` needs to know all active layers from all + // containers, therefore it must be exclusive to other `provision`, `destroy` + // and `pruneImages` so that we do not prune image layers which is used by an + // active `provision` or `destroy`. + process::ReadWriteLock rwLock; }; } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/store.cpp b/src/slave/containerizer/mesos/provisioner/store.cpp index cc5cc81..11fce0e 100644 --- a/src/slave/containerizer/mesos/provisioner/store.cpp +++ b/src/slave/containerizer/mesos/provisioner/store.cpp @@ -15,6 +15,7 @@ // limitations under the License. #include <string> +#include <vector> #include <mesos/type_utils.hpp> @@ -31,6 +32,7 @@ #include "slave/containerizer/mesos/provisioner/docker/store.hpp" using std::string; +using std::vector; using process::Owned; @@ -86,6 +88,14 @@ Try<hashmap<Image::Type, Owned<Store>>> Store::create( return stores; } + +process::Future<Nothing> Store::prune( + const vector<Image>& excludeImages, + const hashset<string>& activeLayerPaths) +{ + return Nothing(); +} + } // namespace slave { } // namespace internal { } // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/mesos/provisioner/store.hpp b/src/slave/containerizer/mesos/provisioner/store.hpp index 01ab83d..a4ae00e 100644 --- a/src/slave/containerizer/mesos/provisioner/store.hpp +++ b/src/slave/containerizer/mesos/provisioner/store.hpp @@ -31,6 +31,7 @@ #include <process/future.hpp> #include <process/owned.hpp> +#include <stout/hashset.hpp> #include <stout/try.hpp> #include "slave/flags.hpp" @@ -87,6 +88,23 @@ public: virtual process::Future<ImageInfo> get( const Image& image, const std::string& backend) = 0; + + // Prune unused images from the given store. This is called within + // an exclusive lock from `provisioner`, which means any other + // image provision or prune are blocked until the future is satsified, + // so an implementation should minimize the blocking time. + // + // Any image specified in `excludedImages` should not be pruned if + // it is already cached previously. + // + // On top of this, all layer paths used to provisioner all active + // containers are also passed in `activeLayerPaths`, and these layers + // should also be retained. Because in certain store (e.g, docker store) + // the cache is not source of truth, and we need to not only keep the + // excluded images, but also maintain the cache. + virtual process::Future<Nothing> prune( + const std::vector<Image>& excludedImages, + const hashset<std::string>& activeLayerPaths); }; } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer.cpp b/src/tests/containerizer.cpp index c6f1ec0..665bd5d 100644 --- a/src/tests/containerizer.cpp +++ b/src/tests/containerizer.cpp @@ -31,6 +31,7 @@ using process::http::Connection; using std::map; using std::shared_ptr; using std::string; +using std::vector; using testing::_; using testing::Invoke; @@ -313,6 +314,11 @@ public: return containers_.keys(); } + Future<Nothing> pruneImages() + { + return Nothing(); + } + private: struct ContainerData { @@ -437,6 +443,9 @@ void TestContainerizer::setup() EXPECT_CALL(*this, kill(_, _)) .WillRepeatedly(Invoke(this, &TestContainerizer::_kill)); + + EXPECT_CALL(*this, pruneImages()) + .WillRepeatedly(Invoke(this, &TestContainerizer::_pruneImages)); } @@ -568,6 +577,14 @@ Future<hashset<ContainerID>> TestContainerizer::containers() &TestContainerizerProcess::containers); } + +Future<Nothing> TestContainerizer::_pruneImages() +{ + return process::dispatch( + process.get(), + &TestContainerizerProcess::pruneImages); +} + } // namespace tests { } // namespace internal { } // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer.hpp b/src/tests/containerizer.hpp index c98913f..3d162fe 100644 --- a/src/tests/containerizer.hpp +++ b/src/tests/containerizer.hpp @@ -122,6 +122,10 @@ public: kill, process::Future<bool>(const ContainerID&, int)); + MOCK_METHOD0( + pruneImages, + process::Future<Nothing>()); + // Additional destroy method for testing because we won't know the // ContainerID created for each container. process::Future<bool> destroy( @@ -163,10 +167,13 @@ private: process::Future<bool> _destroy( const ContainerID& containerId); + process::Future<bool> _kill( const ContainerID& containerId, int status); + process::Future<Nothing> _pruneImages(); + process::Owned<TestContainerizerProcess> process; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer/mock_containerizer.hpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/mock_containerizer.hpp b/src/tests/containerizer/mock_containerizer.hpp index 5befccc..bbfa768 100644 --- a/src/tests/containerizer/mock_containerizer.hpp +++ b/src/tests/containerizer/mock_containerizer.hpp @@ -81,6 +81,8 @@ public: MOCK_METHOD0( containers, process::Future<hashset<ContainerID>>()); + + MOCK_METHOD0(pruneImages, process::Future<Nothing>()); }; } // namespace tests {