Repository: mesos
Updated Branches:
  refs/heads/master 8d2cebe5e -> d733b1031


Unmounted any mount points in gc paths.

In various corner cases, agent may not get chance to properly unmount
persistent volumes mounted inside an executor's sandbox. When GC later
gets to these sandbox directories, permanent data loss can happen (see
MESOS-8830).

Currently, the only mounts in the host mount namespace under the sandbox
directories are persistent volumes, so this diff added protection to
unmount any dangling mount points before calling `rmdir` on the
directory.

NOTE: this means agent will not garbage collect any path if it cannot
read its own `mountinfo` table.

Review: https://reviews.apache.org/r/67264


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/db2d1c2c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/db2d1c2c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/db2d1c2c

Branch: refs/heads/master
Commit: db2d1c2c41a07cc7c47afcf86d509d96ea2c04d3
Parents: 8d2cebe
Author: Zhitao Li <[email protected]>
Authored: Tue May 22 10:55:50 2018 -0700
Committer: Zhitao Li <[email protected]>
Committed: Tue Jun 12 13:17:54 2018 -0700

----------------------------------------------------------------------
 src/local/local.cpp       |  2 +-
 src/slave/gc.cpp          | 74 ++++++++++++++++++++++++++++++++++++++----
 src/slave/gc.hpp          |  2 +-
 src/slave/gc_process.hpp  |  7 ++--
 src/slave/main.cpp        |  2 +-
 src/tests/cluster.cpp     |  2 +-
 src/tests/gc_tests.cpp    |  6 ++--
 src/tests/mesos.cpp       |  3 +-
 src/tests/mesos.hpp       |  2 +-
 src/tests/slave_tests.cpp |  6 ++--
 10 files changed, 85 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/local/local.cpp
----------------------------------------------------------------------
diff --git a/src/local/local.cpp b/src/local/local.cpp
index afff546..5b7bb59 100644
--- a/src/local/local.cpp
+++ b/src/local/local.cpp
@@ -427,7 +427,7 @@ PID<Master> launch(const Flags& flags, Allocator* 
_allocator)
         << slaveFlags.runtime_dir << "': " << mkdir.error();
     }
 
-    garbageCollectors->push_back(new GarbageCollector());
+    garbageCollectors->push_back(new GarbageCollector(slaveFlags.work_dir));
     taskStatusUpdateManagers->push_back(
         new TaskStatusUpdateManager(slaveFlags));
     fetchers->push_back(new Fetcher(slaveFlags));

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/slave/gc.cpp
----------------------------------------------------------------------
diff --git a/src/slave/gc.cpp b/src/slave/gc.cpp
index 390b35e..8770d88 100644
--- a/src/slave/gc.cpp
+++ b/src/slave/gc.cpp
@@ -25,6 +25,7 @@
 
 #include <process/metrics/metrics.hpp>
 
+#include <stout/adaptor.hpp>
 #include <stout/foreach.hpp>
 #include <stout/lambda.hpp>
 
@@ -32,6 +33,10 @@
 
 #include "logging/logging.hpp"
 
+#ifdef __linux__
+#include "linux/fs.hpp"
+#endif
+
 #include "slave/gc_process.hpp"
 
 using namespace process;
@@ -193,12 +198,69 @@ void GarbageCollectorProcess::remove(const Timeout& 
removalTime)
 
     Counter _succeeded = metrics.path_removals_succeeded;
     Counter _failed = metrics.path_removals_failed;
+    const string _workDir = workDir;
 
-    auto rmdirs = [_succeeded, _failed, infos]() {
+    auto rmdirs =
+      [_succeeded, _failed, _workDir, infos]() mutable -> Future<Nothing> {
       // Make mutable copies of the counters to work around MESOS-7907.
       Counter succeeded = _succeeded;
       Counter failed = _failed;
 
+#ifdef __linux__
+      // Clear any possible persistent volume mount points in `infos`. See
+      // MESOS-8830.
+      Try<fs::MountInfoTable> mountTable = fs::MountInfoTable::read();
+      if (mountTable.isError()) {
+        LOG(ERROR) << "Skipping any path deletion because of failure on read "
+                      "MountInfoTable for agent process: "
+                   << mountTable.error();
+
+        foreach (const Owned<PathInfo>& info, infos) {
+          info->promise.fail(mountTable.error());
+          ++failed;
+        }
+
+        return Failure(mountTable.error());
+      }
+
+      foreach (const fs::MountInfoTable::Entry& entry,
+               adaptor::reverse(mountTable->entries)) {
+        // Ignore mounts whose targets are not under `workDir`.
+        if (!strings::startsWith(
+                path::join(entry.target, ""),
+                path::join(_workDir, ""))) {
+                continue;
+        }
+
+        for (auto it = infos.begin(); it != infos.end(); ) {
+          const Owned<PathInfo>& info = *it;
+          // TODO(zhitao): Validate that both `info->path` and `workDir` are
+          // real paths.
+          if (strings::startsWith(
+                path::join(entry.target, ""), path::join(info->path, ""))) {
+            LOG(WARNING)
+                << "Unmounting dangling mount point '" << entry.target
+                << "' of persistent volume '" << entry.root
+                << "' inside garbage collected path '" << info->path << "'";
+
+            Try<Nothing> unmount = fs::unmount(entry.target);
+            if (unmount.isError()) {
+              LOG(WARNING) << "Skipping deletion of '"
+                           << info->path << "' because unmount failed on '"
+                           << entry.target << "': " << unmount.error();
+
+              info->promise.fail(unmount.error());
+              ++failed;
+              it = infos.erase(it);
+              continue;
+            }
+          }
+
+          it++;
+        }
+      }
+#endif // __linux__
+
       foreach (const Owned<PathInfo>& info, infos) {
         // Run the removal operation with 'continueOnError = true'.
         // It's possible for tasks and isolators to lay down files
@@ -241,11 +303,9 @@ void GarbageCollectorProcess::remove(const Timeout& 
removalTime)
 }
 
 
-void  GarbageCollectorProcess::_remove(const Future<Nothing>& result,
-                                       const list<Owned<PathInfo>> infos)
+void GarbageCollectorProcess::_remove(const Future<Nothing>& result,
+                                      const list<Owned<PathInfo>> infos)
 {
-  CHECK_READY(result);
-
   // Remove path records from `paths` and `timeouts` data structures.
   foreach (const Owned<PathInfo>& info, infos) {
     CHECK(paths.remove(timeouts[info->path], info));
@@ -268,9 +328,9 @@ void GarbageCollectorProcess::prune(const Duration& d)
 }
 
 
-GarbageCollector::GarbageCollector()
+GarbageCollector::GarbageCollector(const string& workDir)
 {
-  process = new GarbageCollectorProcess();
+  process = new GarbageCollectorProcess(workDir);
   spawn(process);
 }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/slave/gc.hpp
----------------------------------------------------------------------
diff --git a/src/slave/gc.hpp b/src/slave/gc.hpp
index df40165..0275c73 100644
--- a/src/slave/gc.hpp
+++ b/src/slave/gc.hpp
@@ -42,7 +42,7 @@ class GarbageCollectorProcess;
 class GarbageCollector
 {
 public:
-  GarbageCollector();
+  explicit GarbageCollector(const std::string& workDir);
   virtual ~GarbageCollector();
 
   // Schedules the specified path for removal after the specified

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/slave/gc_process.hpp
----------------------------------------------------------------------
diff --git a/src/slave/gc_process.hpp b/src/slave/gc_process.hpp
index 20374ad..84c83d3 100644
--- a/src/slave/gc_process.hpp
+++ b/src/slave/gc_process.hpp
@@ -45,9 +45,10 @@ class GarbageCollectorProcess :
     public process::Process<GarbageCollectorProcess>
 {
 public:
-  GarbageCollectorProcess()
+  explicit GarbageCollectorProcess(const std::string& _workDir)
     : ProcessBase(process::ID::generate("agent-garbage-collector")),
-      metrics(this) {}
+      metrics(this),
+      workDir(_workDir) {}
 
   virtual ~GarbageCollectorProcess();
 
@@ -97,6 +98,8 @@ private:
     process::metrics::PullGauge path_removals_pending;
   } metrics;
 
+  const std::string workDir;
+
   // Store all the timeouts and corresponding paths to delete.
   // NOTE: We are using Multimap here instead of Multihashmap, because
   // we need the keys of the map (deletion time) to be sorted.

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/slave/main.cpp
----------------------------------------------------------------------
diff --git a/src/slave/main.cpp b/src/slave/main.cpp
index 6461253..489e875 100644
--- a/src/slave/main.cpp
+++ b/src/slave/main.cpp
@@ -539,7 +539,7 @@ int main(int argc, char** argv)
   }
 
   Files* files = new Files(READONLY_HTTP_AUTHENTICATION_REALM, authorizer_);
-  GarbageCollector* gc = new GarbageCollector();
+  GarbageCollector* gc = new GarbageCollector(flags.work_dir);
   TaskStatusUpdateManager* taskStatusUpdateManager =
     new TaskStatusUpdateManager(flags);
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/tests/cluster.cpp
----------------------------------------------------------------------
diff --git a/src/tests/cluster.cpp b/src/tests/cluster.cpp
index 01eb095..cb7d3f0 100644
--- a/src/tests/cluster.cpp
+++ b/src/tests/cluster.cpp
@@ -505,7 +505,7 @@ Try<process::Owned<Slave>> Slave::create(
 
   // If the garbage collector is not provided, create a default one.
   if (gc.isNone()) {
-    slave->gc.reset(new slave::GarbageCollector());
+    slave->gc.reset(new slave::GarbageCollector(flags.work_dir));
   }
 
   // If the resource estimator is not provided, create a default one.

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/tests/gc_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/gc_tests.cpp b/src/tests/gc_tests.cpp
index 619ed22..ed0fef3 100644
--- a/src/tests/gc_tests.cpp
+++ b/src/tests/gc_tests.cpp
@@ -96,7 +96,7 @@ class GarbageCollectorTest : public TemporaryDirectoryTest {};
 
 TEST_F(GarbageCollectorTest, Schedule)
 {
-  GarbageCollector gc;
+  GarbageCollector gc("work_dir");
 
   // Make some temporary files to gc.
   const string& file1 = "file1";
@@ -180,7 +180,7 @@ TEST_F(GarbageCollectorTest, Schedule)
 
 TEST_F(GarbageCollectorTest, Unschedule)
 {
-  GarbageCollector gc;
+  GarbageCollector gc("work_dir");
 
   // Attempt to unschedule a file that is not scheduled.
   AWAIT_ASSERT_FALSE(gc.unschedule("bogus"));
@@ -229,7 +229,7 @@ TEST_F(GarbageCollectorTest, Unschedule)
 
 TEST_F(GarbageCollectorTest, Prune)
 {
-  GarbageCollector gc;
+  GarbageCollector gc("work_dir");
 
   // Make some temporary files to prune.
   const string& file1 = "file1";

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/tests/mesos.cpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp
index d3c87c2..c0ab2f7 100644
--- a/src/tests/mesos.cpp
+++ b/src/tests/mesos.cpp
@@ -691,7 +691,8 @@ MockAuthorizer::MockAuthorizer()
 MockAuthorizer::~MockAuthorizer() {}
 
 
-MockGarbageCollector::MockGarbageCollector()
+MockGarbageCollector::MockGarbageCollector(const string& workDir)
+    : slave::GarbageCollector(workDir)
 {
   EXPECT_CALL(*this, unschedule(_)).WillRepeatedly(Return(true));
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/tests/mesos.hpp
----------------------------------------------------------------------
diff --git a/src/tests/mesos.hpp b/src/tests/mesos.hpp
index 733344a..8f529fa 100644
--- a/src/tests/mesos.hpp
+++ b/src/tests/mesos.hpp
@@ -3353,7 +3353,7 @@ public:
 class MockGarbageCollector : public slave::GarbageCollector
 {
 public:
-  MockGarbageCollector();
+  explicit MockGarbageCollector(const std::string& workDir);
   virtual ~MockGarbageCollector();
 
   // The default action is to always return `true`.

http://git-wip-us.apache.org/repos/asf/mesos/blob/db2d1c2c/src/tests/slave_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/slave_tests.cpp b/src/tests/slave_tests.cpp
index 3d67511..b46fb8e 100644
--- a/src/tests/slave_tests.cpp
+++ b/src/tests/slave_tests.cpp
@@ -4709,10 +4709,10 @@ TEST_F(SlaveTest, 
RemoveExecutorUponFailedTaskGroupLaunch)
 
   Owned<MasterDetector> detector = master.get()->createDetector();
 
-  MockGarbageCollector mockGarbageCollector;
-
   slave::Flags slaveFlags = CreateSlaveFlags();
 
+  MockGarbageCollector mockGarbageCollector(slaveFlags.work_dir);
+
   // Start a mock slave.
   Try<Owned<cluster::Slave>> slave =
     StartSlave(detector.get(), &mockGarbageCollector, slaveFlags, true);
@@ -4867,8 +4867,8 @@ TEST_F(SlaveTest, LaunchTasksReorderUnscheduleGC)
   ASSERT_SOME(master);
 
   Owned<MasterDetector> detector = master.get()->createDetector();
-  MockGarbageCollector mockGarbageCollector;
   slave::Flags slaveFlags = CreateSlaveFlags();
+  MockGarbageCollector mockGarbageCollector(slaveFlags.work_dir);
 
   // Start a mock slave.
   Try<Owned<cluster::Slave>> slave =

Reply via email to