This is an automated email from the ASF dual-hosted git repository.

abudnik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit af3ca189aced5fbc537bfca571264142d4cd37b3
Author: Charles-Francois Natali <cf.nat...@gmail.com>
AuthorDate: Wed Apr 1 13:40:16 2020 +0100

    Handled EBUSY when destroying a cgroup.
    
    It's a workaround for kernel bugs which can cause `rmdir` to fail with
    `EBUSY` even though the cgroup - appears - empty.
    See for example https://lkml.org/lkml/2020/1/15/1349
    
    This closes #355
---
 src/linux/cgroups.cpp                              | 121 ++++++++++++++-------
 src/linux/cgroups.hpp                              |  14 ---
 src/tests/containerizer/cgroups_isolator_tests.cpp |   3 +-
 src/tests/containerizer/cgroups_tests.cpp          |  14 +--
 4 files changed, 88 insertions(+), 64 deletions(-)

diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp
index 2234f0d..5f10470 100644
--- a/src/linux/cgroups.cpp
+++ b/src/linux/cgroups.cpp
@@ -41,11 +41,13 @@ extern "C" {
 #include <string>
 #include <vector>
 
+#include <process/after.hpp>
 #include <process/collect.hpp>
 #include <process/defer.hpp>
 #include <process/delay.hpp>
 #include <process/id.hpp>
 #include <process/io.hpp>
+#include <process/loop.hpp>
 #include <process/process.hpp>
 #include <process/reap.hpp>
 
@@ -263,6 +265,73 @@ static Try<Nothing> cloneCpusetCpusMems(
   return Nothing();
 }
 
+
+// Removes a cgroup from a given hierachy.
+// @param   hierarchy  Path to hierarchy root.
+// @param   cgroup     Path of the cgroup relative to the hierarchy root.
+// @return  Some if the operation succeeds.
+//          Error if the operation fails.
+Future<Nothing> remove(const string& hierarchy, const string& cgroup)
+{
+  const string path = path::join(hierarchy, cgroup);
+
+  // We retry on EBUSY as a workaround for kernel bug
+  // https://lkml.org/lkml/2020/1/15/1349 and others which cause rmdir to fail
+  // with EBUSY even though the cgroup appears empty.
+
+  Duration delay = Duration::zero();
+  int retry = 10;
+
+  return loop(
+      [=]() mutable {
+        auto timeout = process::after(delay);
+        delay = (delay == Duration::zero()) ? Milliseconds(1) : delay * 2;
+        return timeout;
+      },
+      [=](const Nothing&) mutable -> Future<ControlFlow<Nothing>> {
+        if (::rmdir(path.c_str()) == 0) {
+          return process::Break();
+        } else if ((errno == EBUSY) && (retry > 0)) {
+          --retry;
+          return process::Continue();
+        } else {
+          // If the `cgroup` still exists in the hierarchy, treat this as
+          // an error; otherwise, treat this as a success since the `cgroup`
+          // has actually been cleaned up.
+          // Save the error string as os::exists may clobber errno.
+          const string error = os::strerror(errno);
+          if (os::exists(path::join(hierarchy, cgroup))) {
+            return Failure(
+                "Failed to remove directory '" + path + "': " + error);
+          }
+
+          return process::Break();
+        }
+      }
+    );
+}
+
+
+// Removes a list of cgroups from a given hierachy.
+// The cgroups are removed in order, so this function can be used to remove a
+// cgroup hierarchy in a bottom-up fashion.
+// @param   hierarchy  Path to hierarchy root.
+// @param   cgroups    Path of the cgroups relative to the hierarchy root.
+// @return  Some if the operation succeeds.
+//          Error if the operation fails.
+Future<Nothing> remove(const string& hierarchy, const vector<string>& cgroups)
+{
+  Future<Nothing> f = Nothing();
+
+  foreach (const string& cgroup, cgroups) {
+    f = f.then([=] {
+      return internal::remove(hierarchy, cgroup);
+    });
+  }
+
+  return f;
+}
+
 } // namespace internal {
 
 
@@ -691,21 +760,6 @@ Try<Nothing> create(
 }
 
 
-Try<Nothing> remove(const string& hierarchy, const string& cgroup)
-{
-  string path = path::join(hierarchy, cgroup);
-
-  // Do NOT recursively remove cgroups.
-  Try<Nothing> rmdir = os::rmdir(path, false);
-  if (rmdir.isError()) {
-    return Error(
-        "Failed to remove cgroup '" + path + "': " + rmdir.error());
-  }
-
-  return rmdir;
-}
-
-
 bool exists(const string& hierarchy, const string& cgroup)
 {
   return os::exists(path::join(hierarchy, cgroup));
@@ -1522,7 +1576,8 @@ private:
   void killed(const Future<vector<Nothing>>& kill)
   {
     if (kill.isReady()) {
-      remove();
+      internal::remove(hierarchy, cgroups)
+        .onAny(defer(self(), &Destroyer::removed, lambda::_1));
     } else if (kill.isDiscarded()) {
       promise.discard();
       terminate(self());
@@ -1533,24 +1588,16 @@ private:
     }
   }
 
-  void remove()
+  void removed(const Future<Nothing>& removeAll)
   {
-    foreach (const string& cgroup, cgroups) {
-      Try<Nothing> remove = cgroups::remove(hierarchy, cgroup);
-      if (remove.isError()) {
-        // If the `cgroup` still exists in the hierarchy, treat this as
-        // an error; otherwise, treat this as a success since the `cgroup`
-        // has actually been cleaned up.
-        if (os::exists(path::join(hierarchy, cgroup))) {
-          promise.fail(
-              "Failed to remove cgroup '" + cgroup + "': " + remove.error());
-          terminate(self());
-          return;
-        }
-      }
+    if (removeAll.isReady()) {
+      promise.set(Nothing());
+    } else if (removeAll.isDiscarded()) {
+      promise.discard();
+    } else if (removeAll.isFailed()) {
+      promise.fail("Failed to remove cgroups: " + removeAll.failure());
     }
 
-    promise.set(Nothing());
     terminate(self());
   }
 
@@ -1592,17 +1639,7 @@ Future<Nothing> destroy(const string& hierarchy, const 
string& cgroup)
     return future;
   } else {
     // Otherwise, attempt to remove the cgroups in bottom-up fashion.
-    foreach (const string& cgroup, candidates) {
-      Try<Nothing> remove = cgroups::remove(hierarchy, cgroup);
-      if (remove.isError()) {
-        // If the `cgroup` still exists in the hierarchy, treat this as
-        // an error; otherwise, treat this as a success since the `cgroup`
-        // has actually been cleaned up.
-        if (os::exists(path::join(hierarchy, cgroup))) {
-          return Failure(remove.error());
-        }
-      }
-    }
+    return internal::remove(hierarchy, candidates);
   }
 
   return Nothing();
diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp
index 19c0092..f3e9042 100644
--- a/src/linux/cgroups.hpp
+++ b/src/linux/cgroups.hpp
@@ -201,20 +201,6 @@ Try<Nothing> create(
     bool recursive = false);
 
 
-// Remove a cgroup in a given hierarchy. To remove a cgroup, one needs
-// to remove the corresponding directory in the cgroups virtual file
-// system. A cgroup cannot be removed if it has processes or
-// sub-cgroups inside. This function does nothing but tries to remove
-// the corresponding directory of the given cgroup. It will return
-// error if the remove operation fails because it has either processes
-// or sub-cgroups inside. The cgroup will NOT be removed recursively.
-// This function assumes that the given hierarchy and cgroup are
-// valid.
-// @param   hierarchy   Path to the hierarchy root.
-// @param   cgroup      Path to the cgroup relative to the hierarchy root.
-Try<Nothing> remove(const std::string& hierarchy, const std::string& cgroup);
-
-
 // Returns true if the given cgroup under a given hierarchy exists.
 // @param   hierarchy   Path to the hierarchy root.
 // @param   cgroup      Path to the cgroup relative to the hierarchy root.
diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp 
b/src/tests/containerizer/cgroups_isolator_tests.cpp
index 57158ae..9c048c2 100644
--- a/src/tests/containerizer/cgroups_isolator_tests.cpp
+++ b/src/tests/containerizer/cgroups_isolator_tests.cpp
@@ -1489,7 +1489,8 @@ TEST_F(CgroupsIsolatorTest, 
ROOT_CGROUPS_CreateRecursively)
   // We should remove cgroups_root after the slave being started
   // because slave will create cgroups_root dir during startup
   // if it's not present.
-  ASSERT_SOME(cgroups::remove(hierarchy.get(), flags.cgroups_root));
+  ASSERT_SOME(os::rmdir(
+        path::join(hierarchy.get(), flags.cgroups_root), false));
   ASSERT_FALSE(os::exists(flags.cgroups_root));
 
   MockScheduler sched;
diff --git a/src/tests/containerizer/cgroups_tests.cpp 
b/src/tests/containerizer/cgroups_tests.cpp
index 984903d..0f4967f 100644
--- a/src/tests/containerizer/cgroups_tests.cpp
+++ b/src/tests/containerizer/cgroups_tests.cpp
@@ -377,9 +377,9 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, 
ROOT_CGROUPS_CreateRemove)
   EXPECT_ERROR(cgroups::create(baseHierarchy, "mesos_test_missing/1"));
   ASSERT_SOME(cgroups::create(
         path::join(baseHierarchy, "cpu"), "mesos_test_missing"));
-  EXPECT_ERROR(cgroups::remove(baseHierarchy, "invalid"));
-  ASSERT_SOME(cgroups::remove(
-        path::join(baseHierarchy, "cpu"), "mesos_test_missing"));
+  EXPECT_ERROR(os::rmdir(path::join(baseHierarchy, "invalid"), false));
+  ASSERT_SOME(os::rmdir(path::join(
+        path::join(baseHierarchy, "cpu"), "mesos_test_missing"), false));
 }
 
 
@@ -398,8 +398,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Get)
   EXPECT_NE(cgroups->end(),
             find(cgroups->begin(), cgroups->end(), "mesos_test1"));
 
-  ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test1"));
-  ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test2"));
+  ASSERT_SOME(os::rmdir(path::join(hierarchy, "mesos_test1"), false));
+  ASSERT_SOME(os::rmdir(path::join(hierarchy, "mesos_test2"), false));
 }
 
 
@@ -431,8 +431,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_NestedCgroups)
   EXPECT_NE(cgroups->end(),
             find(cgroups->begin(), cgroups->end(), cgroup1));
 
-  ASSERT_SOME(cgroups::remove(hierarchy, cgroup1));
-  ASSERT_SOME(cgroups::remove(hierarchy, cgroup2));
+  ASSERT_SOME(os::rmdir(path::join(hierarchy, cgroup1), false));
+  ASSERT_SOME(os::rmdir(path::join(hierarchy, cgroup2), false));
 }
 
 

Reply via email to