Implemented pruneImages with a mark and sweep in docker store.

This includes the following changes:
- add a `pruneImages()` function on the chain of relevant classes;
- implement prune in docker store;
- fix mock interface to keep existing tests pass.

Review: https://reviews.apache.org/r/56721/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/bdb604a9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/bdb604a9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/bdb604a9

Branch: refs/heads/master
Commit: bdb604a9dc29ab7bc4b9398cf4c1a2bd8b6061c4
Parents: e273efe
Author: Zhitao Li <zhitaoli...@gmail.com>
Authored: Fri Nov 17 16:36:31 2017 -0800
Committer: Gilbert Song <songzihao1...@gmail.com>
Committed: Mon Nov 20 12:29:53 2017 -0800

----------------------------------------------------------------------
 src/slave/containerizer/composing.cpp           |  21 +++
 src/slave/containerizer/composing.hpp           |   2 +
 src/slave/containerizer/containerizer.hpp       |   3 +
 src/slave/containerizer/docker.cpp              |   7 +
 src/slave/containerizer/docker.hpp              |   2 +
 src/slave/containerizer/mesos/containerizer.cpp |  39 ++++
 src/slave/containerizer/mesos/containerizer.hpp |   4 +
 .../provisioner/docker/metadata_manager.cpp     |  50 ++++-
 .../provisioner/docker/metadata_manager.hpp     |  14 ++
 .../mesos/provisioner/docker/paths.cpp          |  25 +++
 .../mesos/provisioner/docker/paths.hpp          |  15 ++
 .../mesos/provisioner/docker/store.cpp          | 151 ++++++++++++++-
 .../mesos/provisioner/docker/store.hpp          |   5 +
 .../mesos/provisioner/provisioner.cpp           | 184 ++++++++++++++-----
 .../mesos/provisioner/provisioner.hpp           |  24 +++
 .../containerizer/mesos/provisioner/store.cpp   |  10 +
 .../containerizer/mesos/provisioner/store.hpp   |  18 ++
 src/tests/containerizer.cpp                     |  17 ++
 src/tests/containerizer.hpp                     |   7 +
 src/tests/containerizer/mock_containerizer.hpp  |   2 +
 20 files changed, 547 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.cpp 
b/src/slave/containerizer/composing.cpp
index 64919ef..9a9e11b 100644
--- a/src/slave/containerizer/composing.cpp
+++ b/src/slave/containerizer/composing.cpp
@@ -95,6 +95,8 @@ public:
 
   Future<Nothing> remove(const ContainerID& containerId);
 
+  Future<Nothing> pruneImages();
+
 private:
   // Continuations.
   Future<Nothing> _recover();
@@ -257,6 +259,12 @@ Future<Nothing> ComposingContainerizer::remove(const 
ContainerID& containerId)
 }
 
 
+Future<Nothing> ComposingContainerizer::pruneImages()
+{
+  return dispatch(process, &ComposingContainerizerProcess::pruneImages);
+}
+
+
 ComposingContainerizerProcess::~ComposingContainerizerProcess()
 {
   foreach (Containerizer* containerizer, containerizers_) {
@@ -687,6 +695,19 @@ Future<Nothing> ComposingContainerizerProcess::remove(
   return containers_[rootContainerId]->containerizer->remove(containerId);
 }
 
+
+Future<Nothing> ComposingContainerizerProcess::pruneImages()
+{
+  list<Future<Nothing>> futures;
+
+  foreach (Containerizer* containerizer, containerizers_) {
+    futures.push_back(containerizer->pruneImages());
+  }
+
+  return collect(futures)
+    .then([]() { return Nothing(); });
+}
+
 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/composing.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.hpp 
b/src/slave/containerizer/composing.hpp
index c2689cf..3d00609 100644
--- a/src/slave/containerizer/composing.hpp
+++ b/src/slave/containerizer/composing.hpp
@@ -86,6 +86,8 @@ public:
 
   virtual process::Future<Nothing> remove(const ContainerID& containerId);
 
+  virtual process::Future<Nothing> pruneImages();
+
 private:
   ComposingContainerizerProcess* process;
 };

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.hpp 
b/src/slave/containerizer/containerizer.hpp
index 2027bd9..7a0c6fc 100644
--- a/src/slave/containerizer/containerizer.hpp
+++ b/src/slave/containerizer/containerizer.hpp
@@ -165,6 +165,9 @@ public:
   {
     return process::Failure("Unsupported");
   }
+
+  // Prune unused images from supported image stores.
+  virtual process::Future<Nothing> pruneImages() = 0;
 };
 
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.cpp 
b/src/slave/containerizer/docker.cpp
index 63432a9..9918d83 100644
--- a/src/slave/containerizer/docker.cpp
+++ b/src/slave/containerizer/docker.cpp
@@ -876,6 +876,13 @@ Future<hashset<ContainerID>> 
DockerContainerizer::containers()
 }
 
 
+Future<Nothing> DockerContainerizer::pruneImages()
+{
+  VLOG(1) << "DockerContainerizer does not support pruneImages";
+  return Nothing();
+}
+
+
 Future<Nothing> DockerContainerizerProcess::recover(
     const Option<SlaveState>& state)
 {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/docker.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/docker.hpp 
b/src/slave/containerizer/docker.hpp
index 105c068..9df9849 100644
--- a/src/slave/containerizer/docker.hpp
+++ b/src/slave/containerizer/docker.hpp
@@ -109,6 +109,8 @@ public:
 
   virtual process::Future<hashset<ContainerID>> containers();
 
+  virtual process::Future<Nothing> pruneImages();
+
 private:
   process::Owned<DockerContainerizerProcess> process;
 };

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp 
b/src/slave/containerizer/mesos/containerizer.cpp
index bf71db1..7f3b86d 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -656,6 +656,12 @@ Future<Nothing> MesosContainerizer::remove(const 
ContainerID& containerId)
 }
 
 
+Future<Nothing> MesosContainerizer::pruneImages()
+{
+  return dispatch(process.get(), &MesosContainerizerProcess::pruneImages);
+}
+
+
 Future<Nothing> MesosContainerizerProcess::recover(
     const Option<state::SlaveState>& state)
 {
@@ -2818,6 +2824,39 @@ Future<hashset<ContainerID>> 
MesosContainerizerProcess::containers()
 }
 
 
+Future<Nothing> MesosContainerizerProcess::pruneImages()
+{
+  vector<Image> excludedImages;
+  excludedImages.reserve(containers_.size());
+
+  foreachpair (
+      const ContainerID& containerId,
+      const Owned<Container>& container,
+      containers_) {
+    // Checkpointing ContainerConfig is introduced recently. Legacy containers
+    // do not have the information of which image is used. Image pruning is
+    // disabled.
+    if (container->config.isNone()) {
+      return Failure(
+          "Container " + stringify(containerId) +
+          " does not have ContainerConfig "
+          "checkpointed. Image pruning is disabled");
+    }
+
+    const ContainerConfig& containerConfig = container->config.get();
+    if (containerConfig.has_container_info() &&
+        containerConfig.container_info().mesos().has_image()) {
+      excludedImages.push_back(
+          containerConfig.container_info().mesos().image());
+    }
+  }
+
+  // TODO(zhitao): use std::unique to deduplicate `excludedImages`.
+
+  return provisioner->pruneImages(excludedImages);
+}
+
+
 MesosContainerizerProcess::Metrics::Metrics()
   : container_destroy_errors(
         "containerizer/mesos/container_destroy_errors")

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp 
b/src/slave/containerizer/mesos/containerizer.hpp
index e859b5b..e2739e0 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -111,6 +111,8 @@ public:
 
   virtual process::Future<Nothing> remove(const ContainerID& containerId);
 
+  virtual process::Future<Nothing> pruneImages();
+
 private:
   explicit MesosContainerizer(
       const process::Owned<MesosContainerizerProcess>& process);
@@ -181,6 +183,8 @@ public:
 
   virtual process::Future<hashset<ContainerID>> containers();
 
+  virtual process::Future<Nothing> pruneImages();
+
 private:
   enum State
   {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
----------------------------------------------------------------------
diff --git 
a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp 
b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
index d86afd2..1ab66c1 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.cpp
@@ -67,7 +67,8 @@ public:
       const spec::ImageReference& reference,
       bool cached);
 
-  // TODO(chenlily): Implement removal of unreferenced images.
+  Future<hashset<string>> prune(
+      const vector<spec::ImageReference>& excludedImages);
 
 private:
   // Write out metadata manager state to persistent store.
@@ -134,6 +135,16 @@ Future<Option<Image>> MetadataManager::get(
 }
 
 
+Future<hashset<string>> MetadataManager::prune(
+    const vector<spec::ImageReference>& excludedImages)
+{
+  return dispatch(
+      process.get(),
+      &MetadataManagerProcess::prune,
+      excludedImages);
+}
+
+
 Future<Image> MetadataManagerProcess::put(
     const spec::ImageReference& reference,
     const vector<string>& layerIds)
@@ -180,6 +191,43 @@ Future<Option<Image>> MetadataManagerProcess::get(
 }
 
 
+Future<hashset<string>> MetadataManagerProcess::prune(
+    const vector<spec::ImageReference>& excludedImages)
+{
+  hashmap<string, Image> retainedImages;
+  hashset<string> retainedLayers;
+
+  foreach (const spec::ImageReference& reference, excludedImages) {
+    const string imageName = stringify(reference);
+    Option<Image> image = storedImages.get(imageName);
+
+    if (image.isNone()) {
+      // This is possible if docker store was cleaned
+      // in a recovery after the container using this image was
+      // launched.
+      VLOG(1) << "Excluded docker image '" << imageName
+              << "' is not cached in metadata manager.";
+      continue;
+    }
+
+    retainedImages[imageName] = image.get();
+
+    foreach (const string& layerId, image->layer_ids()) {
+      retainedLayers.insert(layerId);
+    }
+  }
+
+  storedImages = std::move(retainedImages);
+
+  Try<Nothing> status = persist();
+  if (status.isError()) {
+    return Failure("Failed to save state of Docker images: " + status.error());
+  }
+
+  return retainedLayers;
+}
+
+
 Try<Nothing> MetadataManagerProcess::persist()
 {
   Images images;

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
----------------------------------------------------------------------
diff --git 
a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp 
b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
index 954da16..cfafd44 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/metadata_manager.hpp
@@ -21,6 +21,7 @@
 #include <string>
 
 #include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
 #include <stout/json.hpp>
 #include <stout/option.hpp>
 #include <stout/protobuf.hpp>
@@ -92,6 +93,19 @@ public:
       const ::docker::spec::ImageReference& reference,
       bool cached);
 
+  /**
+   * Prune images from the metadata manager by comparing
+   * existing images with active images in use. This function will
+   * remove all images not used anymore, and return the list of
+   * layers which are still referenced. The caller should
+   * ensure such layers are kept in best effort.
+   *
+   * @param excludedImages all images to exclude from pruning.
+   * @return a list of all layers still refered.
+   */
+  process::Future<hashset<std::string>> prune(
+      const std::vector<::docker::spec::ImageReference>& excludedImages);
+
 private:
   explicit MetadataManager(process::Owned<MetadataManagerProcess> process);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp 
b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
index cd684b3..f692552 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.cpp
@@ -18,8 +18,12 @@
 
 #include "slave/containerizer/mesos/provisioner/docker/paths.hpp"
 
+#include <process/clock.hpp>
+
+#include <stout/os.hpp>
 #include <stout/path.hpp>
 
+using std::list;
 using std::string;
 
 namespace mesos {
@@ -46,6 +50,13 @@ string getImageLayerPath(const string& storeDir, const 
string& layerId)
 }
 
 
+Try<list<string>> listLayers(const string& storeDir)
+{
+  const string layersDir = path::join(storeDir, "layers");
+  return os::ls(layersDir);
+}
+
+
 string getImageLayerManifestPath(const string& layerPath)
 {
   return path::join(layerPath, "json");
@@ -100,6 +111,20 @@ string getStoredImagesPath(const string& storeDir)
   return path::join(storeDir, "storedImages");
 }
 
+
+string getGcDir(const string& storeDir)
+{
+  return path::join(storeDir, "gc");
+}
+
+
+string getGcLayerPath(const string& storeDir, const string& layerId)
+{
+  return path::join(
+      getGcDir(storeDir),
+      layerId + "." + stringify(process::Clock::now().duration().ns()));
+}
+
 } // namespace paths {
 } // namespace docker {
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp 
b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
index 232c027..0cd7f31 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/paths.hpp
@@ -17,8 +17,11 @@
 #ifndef __PROVISIONER_DOCKER_PATHS_HPP__
 #define __PROVISIONER_DOCKER_PATHS_HPP__
 
+#include <list>
 #include <string>
 
+#include <stout/try.hpp>
+
 namespace mesos {
 namespace internal {
 namespace slave {
@@ -40,6 +43,7 @@ namespace paths {
  *           |-- json(manifest)
  *           |-- VERSION
  *    |--storedImages (file holding on cached images)
+ *    |--gc (dir holding marked layers to be sweeped)
  */
 
 // TODO(gilbert): Clean up any unused method after refactoring.
@@ -90,6 +94,17 @@ std::string getImageArchiveTarPath(
 
 std::string getStoredImagesPath(const std::string& storeDir);
 
+
+std::string getGcDir(const std::string& storeDir);
+
+
+std::string getGcLayerPath(
+    const std::string& storeDir,
+    const std::string& layerId);
+
+
+Try<std::list<std::string>> listLayers(const std::string& storeDir);
+
 } // namespace paths {
 } // namespace docker {
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.cpp 
b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
index f357710..d64e6eb 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
@@ -24,13 +24,16 @@
 #include <mesos/secret/resolver.hpp>
 
 #include <stout/hashmap.hpp>
+#include <stout/hashset.hpp>
 #include <stout/json.hpp>
 #include <stout/os.hpp>
 
 #include <process/collect.hpp>
 #include <process/defer.hpp>
 #include <process/dispatch.hpp>
+#include <process/executor.hpp>
 #include <process/id.hpp>
+#include <process/metrics/counter.hpp>
 
 #include "slave/containerizer/mesos/provisioner/constants.hpp"
 #include "slave/containerizer/mesos/provisioner/utils.hpp"
@@ -75,7 +78,9 @@ public:
     : ProcessBase(process::ID::generate("docker-provisioner-store")),
       flags(_flags),
       metadataManager(_metadataManager),
-      puller(_puller) {}
+      puller(_puller)
+  {
+  }
 
   ~StoreProcess() {}
 
@@ -85,6 +90,10 @@ public:
       const mesos::Image& image,
       const string& backend);
 
+  Future<Nothing> prune(
+      const std::vector<mesos::Image>& excludeImages,
+      const hashset<string>& activeLayerPaths);
+
 private:
   Future<Image> _get(
       const spec::ImageReference& reference,
@@ -106,11 +115,18 @@ private:
       const string& layerId,
       const string& backend);
 
+  Future<Nothing> _prune(
+      const hashset<string>& activeLayerPaths,
+      const hashset<string>& retainedImageLayers);
+
   const Flags flags;
 
   Owned<MetadataManager> metadataManager;
   Owned<Puller> puller;
   hashmap<string, Owned<Promise<Image>>> pulling;
+
+  // For executing path removals in a separated actor.
+  process::Executor executor;
 };
 
 
@@ -164,6 +180,12 @@ Try<Owned<slave::Store>> Store::create(
                  mkdir.error());
   }
 
+  mkdir = os::mkdir(paths::getGcDir(flags.docker_store_dir));
+  if (mkdir.isError()) {
+    return Error("Failed to create Docker store gc directory: " +
+                 mkdir.error());
+  }
+
   Try<Owned<MetadataManager>> metadataManager = MetadataManager::create(flags);
   if (metadataManager.isError()) {
     return Error(metadataManager.error());
@@ -203,6 +225,15 @@ Future<ImageInfo> Store::get(
 }
 
 
+Future<Nothing> Store::prune(
+    const vector<mesos::Image>& excludedImages,
+    const hashset<string>& activeLayerPaths)
+{
+  return dispatch(
+      process.get(), &StoreProcess::prune, excludedImages, activeLayerPaths);
+}
+
+
 Future<Nothing> StoreProcess::recover()
 {
   return metadataManager->recover();
@@ -447,6 +478,124 @@ Future<Nothing> StoreProcess::moveLayer(
   return Nothing();
 }
 
+
+Future<Nothing> StoreProcess::prune(
+    const vector<mesos::Image>& excludedImages,
+    const hashset<string>& activeLayerPaths)
+{
+  // All existing pulling should have finished.
+  if (!pulling.empty()) {
+    return Failure("Cannot prune and pull at the same time");
+  }
+
+  vector<spec::ImageReference> imageReferences;
+  imageReferences.reserve(excludedImages.size());
+
+  foreach (const mesos::Image& image, excludedImages) {
+    Try<spec::ImageReference> reference =
+      spec::parseImageReference(image.docker().name());
+
+    if (reference.isError()) {
+      return Failure(
+          "Failed to parse docker image '" + image.docker().name() +
+          "': " + reference.error());
+    }
+
+    imageReferences.push_back(reference.get());
+  }
+
+  return metadataManager->prune(imageReferences)
+      .then(defer(self(), &Self::_prune, activeLayerPaths, lambda::_1));
+}
+
+
+Future<Nothing> StoreProcess::_prune(
+    const hashset<string>& activeLayerRootfses,
+    const hashset<string>& retainedLayerIds)
+{
+  Try<list<string>> allLayers = paths::listLayers(flags.docker_store_dir);
+  if (allLayers.isError()) {
+    return Failure("Failed to find all layer paths: " + allLayers.error());
+  }
+
+  // Paths in provisioner are layer rootfs. Normalize them to layer
+  // path.
+  hashset<string> activeLayerPaths;
+
+  foreach (const string& rootfsPath, activeLayerRootfses) {
+    activeLayerPaths.insert(Path(rootfsPath).dirname());
+  }
+
+  foreach (const string& layerId, allLayers.get()) {
+    if (retainedLayerIds.contains(layerId)) {
+      VLOG(1) << "Layer '" << layerId << "' is retained by image store cache";
+      continue;
+    }
+
+    const string layerPath =
+      paths::getImageLayerPath(flags.docker_store_dir, layerId);
+
+    if (activeLayerPaths.contains(layerPath)) {
+      VLOG(1) << "Layer '" << layerId << "' is retained by active container";
+      continue;
+    }
+
+    const string target =
+      paths::getGcLayerPath(flags.docker_store_dir, layerId);
+
+    if (os::exists(target)) {
+      return Failure("Marking phase target '" + target + "' already exists");
+    }
+
+    VLOG(1) << "Marking layer '" << layerId << "' to gc by renaming '"
+            << layerPath << "' to '" << target << "'";
+
+    Try<Nothing> rename = os::rename(layerPath, target);
+    if (rename.isError()) {
+      return Failure(
+          "Failed to move layer from '" + layerPath +
+          "' to '" + target + "': " + rename.error());
+    }
+  }
+
+  const string gcDir = paths::getGcDir(flags.docker_store_dir);
+  auto rmdirs = [gcDir]() {
+    Try<list<string>> targets = os::ls(gcDir);
+    if (targets.isError()) {
+      LOG(WARNING) << "Error when listing gcDir '" << gcDir
+                   << "': " << targets.error();
+      return Nothing();
+    }
+
+    foreach (const string& target, targets.get()) {
+      const string path = path::join(gcDir, target);
+      // Run the removal operation with 'continueOnError = false'.
+      // A possible situation is that we incorrectly marked a layer
+      // which is still used by certain layer based backends (aufs, overlay).
+      // In such a case, we proceed with a warning and try to free up as much
+      // disk spaces as possible.
+      LOG(INFO) << "Deleting path '" << path << "'";
+      Try<Nothing> rmdir = os::rmdir(path, true, true, false);
+
+      if (rmdir.isError()) {
+        LOG(WARNING) << "Failed to delete '" << path << "': "
+                     << rmdir.error();
+      } else {
+        LOG(INFO) << "Deleted '" << path << "'";
+      }
+    }
+
+    return Nothing();
+  };
+
+  // NOTE: All `rmdirs` calls are dispatched to one executor so that:
+  //   1. They do not block other dispatches;
+  //   2. They do not occupy all worker threads.
+  executor.execute(rmdirs);
+
+  return Nothing();
+}
+
 } // namespace docker {
 } // namespace slave {
 } // namespace internal {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/docker/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.hpp 
b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
index 1cf6866..a420fa0 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.hpp
@@ -21,6 +21,7 @@
 
 #include <process/owned.hpp>
 
+#include <stout/hashset.hpp>
 #include <stout/try.hpp>
 
 #include "slave/flags.hpp"
@@ -58,6 +59,10 @@ public:
       const mesos::Image& image,
       const std::string& backend);
 
+  virtual process::Future<Nothing> prune(
+      const std::vector<mesos::Image>& excludeImages,
+      const hashset<std::string>& activeLayerPaths);
+
 private:
   explicit Store(process::Owned<StoreProcess> process);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.cpp 
b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
index 1723588..d19dc1a 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.cpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.cpp
@@ -32,6 +32,7 @@
 #include <stout/foreach.hpp>
 #include <stout/hashmap.hpp>
 #include <stout/hashset.hpp>
+#include <stout/lambda.hpp>
 #include <stout/os.hpp>
 #include <stout/stringify.hpp>
 #include <stout/uuid.hpp>
@@ -56,6 +57,7 @@ using std::vector;
 using process::Failure;
 using process::Future;
 using process::Owned;
+using process::ReadWriteLock;
 
 using mesos::internal::slave::AUFS_BACKEND;
 using mesos::internal::slave::BIND_BACKEND;
@@ -312,6 +314,16 @@ Future<bool> Provisioner::destroy(const ContainerID& 
containerId) const
 }
 
 
+Future<Nothing> Provisioner::pruneImages(
+    const vector<Image>& excludedImages) const
+{
+  return dispatch(
+      CHECK_NOTNULL(process.get()),
+      &ProvisionerProcess::pruneImages,
+      excludedImages);
+}
+
+
 ProvisionerProcess::ProvisionerProcess(
     const string& _rootDir,
     const string& _defaultBackend,
@@ -450,20 +462,28 @@ Future<ProvisionInfo> ProvisionerProcess::provision(
     const ContainerID& containerId,
     const Image& image)
 {
-  if (!stores.contains(image.type())) {
-    return Failure(
-        "Unsupported container image type: " +
-        stringify(image.type()));
-  }
+  // `destroy` and `provision` can happen concurrently, but `pruneImages`
+  // is exclusive.
+  return rwLock.read_lock()
+    .then(defer(self(), [this, containerId, image]() -> Future<ProvisionInfo> {
+      if (!stores.contains(image.type())) {
+        return Failure(
+            "Unsupported container image type: " + stringify(image.type()));
+      }
 
-  // Get and then provision image layers from the store.
-  return stores.get(image.type()).get()->get(image, defaultBackend)
-    .then(defer(self(),
-                &Self::_provision,
-                containerId,
-                image,
-                defaultBackend,
-                lambda::_1));
+      // Get and then provision image layers from the store.
+      return stores.get(image.type()).get()->get(image, defaultBackend)
+        .then(defer(
+            self(),
+            &Self::_provision,
+            containerId,
+            image,
+            defaultBackend,
+            lambda::_1));
+    }))
+    .onAny(defer(self(), [this](const Future<ProvisionInfo>&) {
+      rwLock.read_unlock();
+    }));
 }
 
 
@@ -510,7 +530,7 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
 
       ContainerLayers containerLayers;
 
-      foreach(const string& layer, imageInfo.layers) {
+      foreach (const string& layer, imageInfo.layers) {
         containerLayers.add_paths(layer);
       }
 
@@ -529,46 +549,55 @@ Future<ProvisionInfo> ProvisionerProcess::_provision(
 
 Future<bool> ProvisionerProcess::destroy(const ContainerID& containerId)
 {
-  if (!infos.contains(containerId)) {
-    VLOG(1) << "Ignoring destroy request for unknown container " << 
containerId;
-
-    return false;
-  }
-
-  if (infos[containerId]->destroying) {
-    return infos[containerId]->termination.future();
-  }
+  // `destroy` and `provision` can happen concurrently, but `pruneImages`
+  // is exclusive.
+  return rwLock.read_lock()
+    .then(defer(self(), [this, containerId]() -> Future<bool> {
+      if (!infos.contains(containerId)) {
+        VLOG(1) << "Ignoring destroy request for unknown container "
+                << containerId;
+
+        return false;
+      }
 
-  infos[containerId]->destroying = true;
+      if (infos[containerId]->destroying) {
+        return infos[containerId]->termination.future();
+      }
 
-  // Provisioner destroy can be invoked from:
-  // 1. Provisioner `recover` to destroy all unknown orphans.
-  // 2. Containerizer `recover` to destroy known orphans.
-  // 3. Containerizer `destroy` on one specific container.
-  //
-  // NOTE: For (2) and (3), we expect the container being destroyed
-  // has no any child contain remain running. However, for case (1),
-  // if the container runtime directory does not survive after the
-  // machine reboots and the provisioner directory under the agent
-  // work dir still exists, all containers will be regarded as
-  // unknown containers and will be destroyed. In this case, a parent
-  // container may be destroyed before its child containers are
-  // cleaned up. So we have to make `destroy()` recursively for
-  // this particular case.
-  //
-  // TODO(gilbert): Move provisioner directory to the container
-  // runtime directory after a deprecation cycle to avoid
-  // making `provisioner::destroy()` being recursive.
-  list<Future<bool>> destroys;
-
-  foreachkey (const ContainerID& entry, infos) {
-    if (entry.has_parent() && entry.parent() == containerId) {
-      destroys.push_back(destroy(entry));
-    }
-  }
+      infos[containerId]->destroying = true;
+
+      // Provisioner destroy can be invoked from:
+      // 1. Provisioner `recover` to destroy all unknown orphans.
+      // 2. Containerizer `recover` to destroy known orphans.
+      // 3. Containerizer `destroy` on one specific container.
+      //
+      // NOTE: For (2) and (3), we expect the container being destroyed
+      // has no any child contain remain running. However, for case (1),
+      // if the container runtime directory does not survive after the
+      // machine reboots and the provisioner directory under the agent
+      // work dir still exists, all containers will be regarded as
+      // unknown containers and will be destroyed. In this case, a parent
+      // container may be destroyed before its child containers are
+      // cleaned up. So we have to make `destroy()` recursively for
+      // this particular case.
+      //
+      // TODO(gilbert): Move provisioner directory to the container
+      // runtime directory after a deprecation cycle to avoid
+      // making `provisioner::destroy()` being recursive.
+      list<Future<bool>> destroys;
+
+      foreachkey (const ContainerID& entry, infos) {
+        if (entry.has_parent() && entry.parent() == containerId) {
+          destroys.push_back(destroy(entry));
+        }
+      }
 
-  return await(destroys)
-    .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+      return await(destroys)
+        .then(defer(self(), &Self::_destroy, containerId, lambda::_1));
+    }))
+    .onAny(defer(self(), [this](const Future<bool>&) {
+      rwLock.read_unlock();
+    }));
 }
 
 
@@ -661,6 +690,59 @@ Future<bool> ProvisionerProcess::__destroy(const 
ContainerID& containerId)
 }
 
 
+Future<Nothing> ProvisionerProcess::pruneImages(
+    const vector<Image>& excludedImages)
+{
+  // `destroy` and `provision` can happen concurrently, but `pruneImages`
+  // is exclusive.
+  return rwLock.write_lock()
+    .then(defer(self(), [this, excludedImages]() -> Future<Nothing> {
+      hashset<string> activeLayerPaths;
+
+      foreachpair (
+          const ContainerID& containerId, const Owned<Info>& info, infos) {
+        if (info->layers.isNone()) {
+          // There are several possibilities if layer information missing:
+          // - legacy containers provisioned before layer checkpointing:
+          //   they should already be excluded by the containerizer;
+          // - the agent crashed after `backend::provision()` finished but
+          //   before checkpointing the `layers`. In such a case, the rootfs
+          //   should not be used by any running containers yet so it is safe
+          //   to skip those layers;
+          // - checkpointed layer files were manually deleted: we do not expect
+          //   this to be allowd, but log it for information purpose.
+          VLOG(1) << "Container " << containerId
+                  << " has no checkpointed layers";
+
+          continue;
+        }
+
+        activeLayerPaths.insert(info->layers->begin(), info->layers->end());
+      }
+
+      list<Future<Nothing>> futures;
+
+      foreachpair (
+          const Image::Type& type, const Owned<Store>& store, stores) {
+        vector<Image> images;
+        foreach (const Image& image, excludedImages) {
+          if (image.type() == type) {
+            images.push_back(image);
+          }
+        }
+
+        futures.push_back(store.get()->prune(images, activeLayerPaths));
+      }
+
+      return collect(futures)
+        .then([]() { return Nothing(); });
+    }))
+    .onAny(defer(self(), [this](const Future<Nothing>&) {
+      rwLock.write_unlock();
+    }));
+}
+
+
 ProvisionerProcess::Metrics::Metrics()
   : remove_container_errors(
       "containerizer/mesos/provisioner/remove_container_errors")

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/provisioner.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/provisioner.hpp 
b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
index b87144c..88d7019 100644
--- a/src/slave/containerizer/mesos/provisioner/provisioner.hpp
+++ b/src/slave/containerizer/mesos/provisioner/provisioner.hpp
@@ -34,6 +34,7 @@
 
 #include <process/future.hpp>
 #include <process/owned.hpp>
+#include <process/rwlock.hpp>
 
 #include <process/metrics/counter.hpp>
 #include <process/metrics/metrics.hpp>
@@ -102,6 +103,12 @@ public:
   // provisioned root filesystem for the given container.
   virtual process::Future<bool> destroy(const ContainerID& containerId) const;
 
+  // Prune images in different stores. Image references in excludedImages
+  // will be passed to stores and retained in a best effort fashion.
+  // All layer paths used by active containers will not be pruned.
+  virtual process::Future<Nothing> pruneImages(
+      const std::vector<Image>& excludedImages) const;
+
 protected:
   Provisioner() {} // For creating mock object.
 
@@ -132,6 +139,9 @@ public:
 
   process::Future<bool> destroy(const ContainerID& containerId);
 
+  process::Future<Nothing> pruneImages(
+      const std::vector<Image>& excludedImages);
+
 private:
   process::Future<ProvisionInfo> _provision(
       const ContainerID& containerId,
@@ -186,6 +196,20 @@ private:
 
     process::metrics::Counter remove_container_errors;
   } metrics;
+
+  // This `ReadWriteLock` instance is used to protect the critical
+  // section, which includes store directory and provision directory.
+  // Because `provision` and `destroy` are scoped by `containerId`,
+  // they are not expected to touch the same critical section
+  // simultaneously, so any `provision` and `destroy` can happen concurrently.
+  // This is guaranteed by Mesos containerizer, e.g., a `destroy` will always
+  // wait for a container's `provision` to finish, then do the cleanup.
+  //
+  // On the other hand, `pruneImages` needs to know all active layers from all
+  // containers, therefore it must be exclusive to other `provision`, `destroy`
+  // and `pruneImages` so that we do not prune image layers which is used by an
+  // active `provision` or `destroy`.
+  process::ReadWriteLock rwLock;
 };
 
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.cpp 
b/src/slave/containerizer/mesos/provisioner/store.cpp
index cc5cc81..11fce0e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/store.cpp
@@ -15,6 +15,7 @@
 // limitations under the License.
 
 #include <string>
+#include <vector>
 
 #include <mesos/type_utils.hpp>
 
@@ -31,6 +32,7 @@
 #include "slave/containerizer/mesos/provisioner/docker/store.hpp"
 
 using std::string;
+using std::vector;
 
 using process::Owned;
 
@@ -86,6 +88,14 @@ Try<hashmap<Image::Type, Owned<Store>>> Store::create(
   return stores;
 }
 
+
+process::Future<Nothing> Store::prune(
+    const vector<Image>& excludeImages,
+    const hashset<string>& activeLayerPaths)
+{
+  return Nothing();
+}
+
 } // namespace slave {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/slave/containerizer/mesos/provisioner/store.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/store.hpp 
b/src/slave/containerizer/mesos/provisioner/store.hpp
index 01ab83d..a4ae00e 100644
--- a/src/slave/containerizer/mesos/provisioner/store.hpp
+++ b/src/slave/containerizer/mesos/provisioner/store.hpp
@@ -31,6 +31,7 @@
 #include <process/future.hpp>
 #include <process/owned.hpp>
 
+#include <stout/hashset.hpp>
 #include <stout/try.hpp>
 
 #include "slave/flags.hpp"
@@ -87,6 +88,23 @@ public:
   virtual process::Future<ImageInfo> get(
       const Image& image,
       const std::string& backend) = 0;
+
+  // Prune unused images from the given store. This is called within
+  // an exclusive lock from `provisioner`, which means any other
+  // image provision or prune are blocked until the future is satsified,
+  // so an implementation should minimize the blocking time.
+  //
+  // Any image specified in `excludedImages` should not be pruned if
+  // it is already cached previously.
+  //
+  // On top of this, all layer paths used to provisioner all active
+  // containers are also passed in `activeLayerPaths`, and these layers
+  // should also be retained. Because in certain store (e.g, docker store)
+  // the cache is not source of truth, and we need to not only keep the
+  // excluded images, but also maintain the cache.
+  virtual process::Future<Nothing> prune(
+      const std::vector<Image>& excludedImages,
+      const hashset<std::string>& activeLayerPaths);
 };
 
 } // namespace slave {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.cpp b/src/tests/containerizer.cpp
index c6f1ec0..665bd5d 100644
--- a/src/tests/containerizer.cpp
+++ b/src/tests/containerizer.cpp
@@ -31,6 +31,7 @@ using process::http::Connection;
 using std::map;
 using std::shared_ptr;
 using std::string;
+using std::vector;
 
 using testing::_;
 using testing::Invoke;
@@ -313,6 +314,11 @@ public:
     return containers_.keys();
   }
 
+  Future<Nothing> pruneImages()
+  {
+    return Nothing();
+  }
+
 private:
   struct ContainerData
   {
@@ -437,6 +443,9 @@ void TestContainerizer::setup()
 
   EXPECT_CALL(*this, kill(_, _))
     .WillRepeatedly(Invoke(this, &TestContainerizer::_kill));
+
+  EXPECT_CALL(*this, pruneImages())
+    .WillRepeatedly(Invoke(this, &TestContainerizer::_pruneImages));
 }
 
 
@@ -568,6 +577,14 @@ Future<hashset<ContainerID>> 
TestContainerizer::containers()
       &TestContainerizerProcess::containers);
 }
 
+
+Future<Nothing> TestContainerizer::_pruneImages()
+{
+  return process::dispatch(
+      process.get(),
+      &TestContainerizerProcess::pruneImages);
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer.hpp b/src/tests/containerizer.hpp
index c98913f..3d162fe 100644
--- a/src/tests/containerizer.hpp
+++ b/src/tests/containerizer.hpp
@@ -122,6 +122,10 @@ public:
       kill,
       process::Future<bool>(const ContainerID&, int));
 
+  MOCK_METHOD0(
+      pruneImages,
+      process::Future<Nothing>());
+
   // Additional destroy method for testing because we won't know the
   // ContainerID created for each container.
   process::Future<bool> destroy(
@@ -163,10 +167,13 @@ private:
   process::Future<bool> _destroy(
       const ContainerID& containerId);
 
+
   process::Future<bool> _kill(
       const ContainerID& containerId,
       int status);
 
+  process::Future<Nothing> _pruneImages();
+
   process::Owned<TestContainerizerProcess> process;
 };
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/bdb604a9/src/tests/containerizer/mock_containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/mock_containerizer.hpp 
b/src/tests/containerizer/mock_containerizer.hpp
index 5befccc..bbfa768 100644
--- a/src/tests/containerizer/mock_containerizer.hpp
+++ b/src/tests/containerizer/mock_containerizer.hpp
@@ -81,6 +81,8 @@ public:
   MOCK_METHOD0(
       containers,
       process::Future<hashset<ContainerID>>());
+
+  MOCK_METHOD0(pruneImages, process::Future<Nothing>());
 };
 
 } // namespace tests {

Reply via email to