This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit af3ca189aced5fbc537bfca571264142d4cd37b3 Author: Charles-Francois Natali <cf.nat...@gmail.com> AuthorDate: Wed Apr 1 13:40:16 2020 +0100 Handled EBUSY when destroying a cgroup. It's a workaround for kernel bugs which can cause `rmdir` to fail with `EBUSY` even though the cgroup - appears - empty. See for example https://lkml.org/lkml/2020/1/15/1349 This closes #355 --- src/linux/cgroups.cpp | 121 ++++++++++++++------- src/linux/cgroups.hpp | 14 --- src/tests/containerizer/cgroups_isolator_tests.cpp | 3 +- src/tests/containerizer/cgroups_tests.cpp | 14 +-- 4 files changed, 88 insertions(+), 64 deletions(-) diff --git a/src/linux/cgroups.cpp b/src/linux/cgroups.cpp index 2234f0d..5f10470 100644 --- a/src/linux/cgroups.cpp +++ b/src/linux/cgroups.cpp @@ -41,11 +41,13 @@ extern "C" { #include <string> #include <vector> +#include <process/after.hpp> #include <process/collect.hpp> #include <process/defer.hpp> #include <process/delay.hpp> #include <process/id.hpp> #include <process/io.hpp> +#include <process/loop.hpp> #include <process/process.hpp> #include <process/reap.hpp> @@ -263,6 +265,73 @@ static Try<Nothing> cloneCpusetCpusMems( return Nothing(); } + +// Removes a cgroup from a given hierachy. +// @param hierarchy Path to hierarchy root. +// @param cgroup Path of the cgroup relative to the hierarchy root. +// @return Some if the operation succeeds. +// Error if the operation fails. +Future<Nothing> remove(const string& hierarchy, const string& cgroup) +{ + const string path = path::join(hierarchy, cgroup); + + // We retry on EBUSY as a workaround for kernel bug + // https://lkml.org/lkml/2020/1/15/1349 and others which cause rmdir to fail + // with EBUSY even though the cgroup appears empty. + + Duration delay = Duration::zero(); + int retry = 10; + + return loop( + [=]() mutable { + auto timeout = process::after(delay); + delay = (delay == Duration::zero()) ? Milliseconds(1) : delay * 2; + return timeout; + }, + [=](const Nothing&) mutable -> Future<ControlFlow<Nothing>> { + if (::rmdir(path.c_str()) == 0) { + return process::Break(); + } else if ((errno == EBUSY) && (retry > 0)) { + --retry; + return process::Continue(); + } else { + // If the `cgroup` still exists in the hierarchy, treat this as + // an error; otherwise, treat this as a success since the `cgroup` + // has actually been cleaned up. + // Save the error string as os::exists may clobber errno. + const string error = os::strerror(errno); + if (os::exists(path::join(hierarchy, cgroup))) { + return Failure( + "Failed to remove directory '" + path + "': " + error); + } + + return process::Break(); + } + } + ); +} + + +// Removes a list of cgroups from a given hierachy. +// The cgroups are removed in order, so this function can be used to remove a +// cgroup hierarchy in a bottom-up fashion. +// @param hierarchy Path to hierarchy root. +// @param cgroups Path of the cgroups relative to the hierarchy root. +// @return Some if the operation succeeds. +// Error if the operation fails. +Future<Nothing> remove(const string& hierarchy, const vector<string>& cgroups) +{ + Future<Nothing> f = Nothing(); + + foreach (const string& cgroup, cgroups) { + f = f.then([=] { + return internal::remove(hierarchy, cgroup); + }); + } + + return f; +} + } // namespace internal { @@ -691,21 +760,6 @@ Try<Nothing> create( } -Try<Nothing> remove(const string& hierarchy, const string& cgroup) -{ - string path = path::join(hierarchy, cgroup); - - // Do NOT recursively remove cgroups. - Try<Nothing> rmdir = os::rmdir(path, false); - if (rmdir.isError()) { - return Error( - "Failed to remove cgroup '" + path + "': " + rmdir.error()); - } - - return rmdir; -} - - bool exists(const string& hierarchy, const string& cgroup) { return os::exists(path::join(hierarchy, cgroup)); @@ -1522,7 +1576,8 @@ private: void killed(const Future<vector<Nothing>>& kill) { if (kill.isReady()) { - remove(); + internal::remove(hierarchy, cgroups) + .onAny(defer(self(), &Destroyer::removed, lambda::_1)); } else if (kill.isDiscarded()) { promise.discard(); terminate(self()); @@ -1533,24 +1588,16 @@ private: } } - void remove() + void removed(const Future<Nothing>& removeAll) { - foreach (const string& cgroup, cgroups) { - Try<Nothing> remove = cgroups::remove(hierarchy, cgroup); - if (remove.isError()) { - // If the `cgroup` still exists in the hierarchy, treat this as - // an error; otherwise, treat this as a success since the `cgroup` - // has actually been cleaned up. - if (os::exists(path::join(hierarchy, cgroup))) { - promise.fail( - "Failed to remove cgroup '" + cgroup + "': " + remove.error()); - terminate(self()); - return; - } - } + if (removeAll.isReady()) { + promise.set(Nothing()); + } else if (removeAll.isDiscarded()) { + promise.discard(); + } else if (removeAll.isFailed()) { + promise.fail("Failed to remove cgroups: " + removeAll.failure()); } - promise.set(Nothing()); terminate(self()); } @@ -1592,17 +1639,7 @@ Future<Nothing> destroy(const string& hierarchy, const string& cgroup) return future; } else { // Otherwise, attempt to remove the cgroups in bottom-up fashion. - foreach (const string& cgroup, candidates) { - Try<Nothing> remove = cgroups::remove(hierarchy, cgroup); - if (remove.isError()) { - // If the `cgroup` still exists in the hierarchy, treat this as - // an error; otherwise, treat this as a success since the `cgroup` - // has actually been cleaned up. - if (os::exists(path::join(hierarchy, cgroup))) { - return Failure(remove.error()); - } - } - } + return internal::remove(hierarchy, candidates); } return Nothing(); diff --git a/src/linux/cgroups.hpp b/src/linux/cgroups.hpp index 19c0092..f3e9042 100644 --- a/src/linux/cgroups.hpp +++ b/src/linux/cgroups.hpp @@ -201,20 +201,6 @@ Try<Nothing> create( bool recursive = false); -// Remove a cgroup in a given hierarchy. To remove a cgroup, one needs -// to remove the corresponding directory in the cgroups virtual file -// system. A cgroup cannot be removed if it has processes or -// sub-cgroups inside. This function does nothing but tries to remove -// the corresponding directory of the given cgroup. It will return -// error if the remove operation fails because it has either processes -// or sub-cgroups inside. The cgroup will NOT be removed recursively. -// This function assumes that the given hierarchy and cgroup are -// valid. -// @param hierarchy Path to the hierarchy root. -// @param cgroup Path to the cgroup relative to the hierarchy root. -Try<Nothing> remove(const std::string& hierarchy, const std::string& cgroup); - - // Returns true if the given cgroup under a given hierarchy exists. // @param hierarchy Path to the hierarchy root. // @param cgroup Path to the cgroup relative to the hierarchy root. diff --git a/src/tests/containerizer/cgroups_isolator_tests.cpp b/src/tests/containerizer/cgroups_isolator_tests.cpp index 57158ae..9c048c2 100644 --- a/src/tests/containerizer/cgroups_isolator_tests.cpp +++ b/src/tests/containerizer/cgroups_isolator_tests.cpp @@ -1489,7 +1489,8 @@ TEST_F(CgroupsIsolatorTest, ROOT_CGROUPS_CreateRecursively) // We should remove cgroups_root after the slave being started // because slave will create cgroups_root dir during startup // if it's not present. - ASSERT_SOME(cgroups::remove(hierarchy.get(), flags.cgroups_root)); + ASSERT_SOME(os::rmdir( + path::join(hierarchy.get(), flags.cgroups_root), false)); ASSERT_FALSE(os::exists(flags.cgroups_root)); MockScheduler sched; diff --git a/src/tests/containerizer/cgroups_tests.cpp b/src/tests/containerizer/cgroups_tests.cpp index 984903d..0f4967f 100644 --- a/src/tests/containerizer/cgroups_tests.cpp +++ b/src/tests/containerizer/cgroups_tests.cpp @@ -377,9 +377,9 @@ TEST_F(CgroupsAnyHierarchyWithCpuMemoryTest, ROOT_CGROUPS_CreateRemove) EXPECT_ERROR(cgroups::create(baseHierarchy, "mesos_test_missing/1")); ASSERT_SOME(cgroups::create( path::join(baseHierarchy, "cpu"), "mesos_test_missing")); - EXPECT_ERROR(cgroups::remove(baseHierarchy, "invalid")); - ASSERT_SOME(cgroups::remove( - path::join(baseHierarchy, "cpu"), "mesos_test_missing")); + EXPECT_ERROR(os::rmdir(path::join(baseHierarchy, "invalid"), false)); + ASSERT_SOME(os::rmdir(path::join( + path::join(baseHierarchy, "cpu"), "mesos_test_missing"), false)); } @@ -398,8 +398,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_Get) EXPECT_NE(cgroups->end(), find(cgroups->begin(), cgroups->end(), "mesos_test1")); - ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test1")); - ASSERT_SOME(cgroups::remove(hierarchy, "mesos_test2")); + ASSERT_SOME(os::rmdir(path::join(hierarchy, "mesos_test1"), false)); + ASSERT_SOME(os::rmdir(path::join(hierarchy, "mesos_test2"), false)); } @@ -431,8 +431,8 @@ TEST_F(CgroupsAnyHierarchyTest, ROOT_CGROUPS_NestedCgroups) EXPECT_NE(cgroups->end(), find(cgroups->begin(), cgroups->end(), cgroup1)); - ASSERT_SOME(cgroups::remove(hierarchy, cgroup1)); - ASSERT_SOME(cgroups::remove(hierarchy, cgroup2)); + ASSERT_SOME(os::rmdir(path::join(hierarchy, cgroup1), false)); + ASSERT_SOME(os::rmdir(path::join(hierarchy, cgroup2), false)); }