Repository: mesos Updated Branches: refs/heads/master 3d082730b -> 05c608f80
Fixed an issue which caused Appc provisioner not to clean up the the container dir for its provisioned rootfses. - Also changed the semantics of Provisioner::create() to fail with it fails to clean up unknown orphan containers. Review: https://reviews.apache.org/r/38319 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/05c608f8 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/05c608f8 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/05c608f8 Branch: refs/heads/master Commit: 05c608f80e3b120302bd2040780ce477677671dc Parents: 3d08273 Author: Jiang Yan Xu <y...@jxu.me> Authored: Fri Sep 11 14:15:47 2015 -0700 Committer: Jiang Yan Xu <y...@jxu.me> Committed: Fri Sep 11 15:43:34 2015 -0700 ---------------------------------------------------------------------- .../provisioners/appc/provisioner.cpp | 97 +++++++++++--------- src/slave/containerizer/provisioners/paths.cpp | 20 ++-- src/slave/containerizer/provisioners/paths.hpp | 5 + .../containerizer/appc_provisioner_tests.cpp | 20 +++- 4 files changed, 85 insertions(+), 57 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/05c608f8/src/slave/containerizer/provisioners/appc/provisioner.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/appc/provisioner.cpp b/src/slave/containerizer/provisioners/appc/provisioner.cpp index e177e3c..cd29a00 100644 --- a/src/slave/containerizer/provisioners/appc/provisioner.cpp +++ b/src/slave/containerizer/provisioners/appc/provisioner.cpp @@ -224,40 +224,17 @@ Future<Nothing> AppcProvisionerProcess::recover( "provisioner: " + containers.error()); } - // If no container has been launched the 'containers' directory will be empty. + // Scan the list of containers, register all of them with 'infos' but + // mark unknown orphans for immediate cleanup. + hashset<ContainerID> unknownOrphans; foreachkey (const ContainerID& containerId, containers.get()) { - if (alive.contains(containerId) || orphans.contains(containerId)) { - Owned<Info> info = Owned<Info>(new Info()); - - Try<hashmap<string, hashmap<string, string>>> rootfses = - provisioners::paths::listContainerRootfses(root, containerId); - - if (rootfses.isError()) { - return Failure("Unable to list rootfses belonged to container '" + - containerId.value() + "': " + rootfses.error()); - } - - foreachkey (const string& backend, rootfses.get()) { - if (!backends.contains(backend)) { - return Failure("Found rootfses managed by an unrecognized backend: " + - backend); - } - - info->rootfses.put(backend, rootfses.get()[backend]); - } - - VLOG(1) << "Recovered container " << containerId; - infos.put(containerId, info); - - continue; - } + Owned<Info> info = Owned<Info>(new Info()); - // Destroy (unknown) orphan container's rootfses. Try<hashmap<string, hashmap<string, string>>> rootfses = provisioners::paths::listContainerRootfses(root, containerId); if (rootfses.isError()) { - return Failure("Unable to find rootfses for container '" + + return Failure("Unable to list rootfses belonged to container '" + containerId.value() + "': " + rootfses.error()); } @@ -267,28 +244,50 @@ Future<Nothing> AppcProvisionerProcess::recover( backend); } - foreachvalue (const string& rootfs, rootfses.get()[backend]) { - LOG(INFO) << "Destroying unknown orphan rootfs '" << rootfs - << "' of container " << containerId; - - // Not waiting for the destruction and we don't care about - // the return value. - backends.get(backend).get()->destroy(rootfs) - .onFailed([rootfs](const std::string& error) { - LOG(WARNING) << "Failed to destroy orphan rootfs '" << rootfs - << "': "<< error; - }); - } + info->rootfses.put(backend, rootfses.get()[backend]); + } + + infos.put(containerId, info); + + if (alive.contains(containerId) || orphans.contains(containerId)) { + VLOG(1) << "Recovered container " << containerId; + continue; + } else { + // For immediate cleanup below. + unknownOrphans.insert(containerId); } } - LOG(INFO) << "Recovered Appc provisioner rootfses"; + LOG(INFO) + << "Recovered living and known orphan containers for Appc provisioner"; + + // Destroy unknown orphan containers' rootfses. + list<Future<bool>> destroys; + foreach (const ContainerID& containerId, unknownOrphans) { + destroys.push_back(destroy(containerId)); + } - return store->recover() + Future<Nothing> cleanup = collect(destroys) + .then([]() -> Future<Nothing> { + LOG(INFO) << "Cleaned up unknown orphan containers for Appc provisioner"; + return Nothing(); + }); + + Future<Nothing> recover = store->recover() .then([]() -> Future<Nothing> { LOG(INFO) << "Recovered Appc image store"; return Nothing(); }); + + + // A successful provisioner recovery depends on: + // 1) Recovery of living containers and known orphans (done above). + // 2) Successful cleanup of unknown orphans. + // 3) Successful store recovery. + return collect(cleanup, recover) + .then([=]() -> Future<Nothing> { + return Nothing(); + }); } @@ -368,7 +367,19 @@ Future<bool> AppcProvisionerProcess::destroy(const ContainerID& containerId) // TODO(xujyan): Revisit the usefulness of this return value. return collect(futures) - .then([=](const list<bool>& results) -> Future<bool> { + .then([=]() -> Future<bool> { + // This should be fairly cheap as the directory should only contain a + // few empty sub-directories at this point. + string containerDir = + provisioners::paths::getContainerDir(root, containerId); + + Try<Nothing> rmdir = os::rmdir(containerDir); + if (rmdir.isError()) { + return Failure("Failed to remove container directory '" + + containerDir + "' of container " + + stringify(containerId)); + } + return true; }); } http://git-wip-us.apache.org/repos/asf/mesos/blob/05c608f8/src/slave/containerizer/provisioners/paths.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/paths.cpp b/src/slave/containerizer/provisioners/paths.cpp index 267912c..4293dd2 100644 --- a/src/slave/containerizer/provisioners/paths.cpp +++ b/src/slave/containerizer/provisioners/paths.cpp @@ -45,14 +45,6 @@ static string getContainersDir(const string& provisionerDir) } -static string getContainerDir( - const string& containersDir, - const ContainerID& containerId) -{ - return path::join(containersDir, containerId.value()); -} - - static string getBackendsDir(const string& containerDir) { return path::join(containerDir, "backends"); @@ -77,6 +69,14 @@ static string getRootfsDir(const string& rootfsesDir, const string& roofsId) } +string getContainerDir( + const string& provisionerDir, + const ContainerID& containerId) +{ + return path::join(getContainersDir(provisionerDir), containerId.value()); +} + + string getContainerRootfsDir( const string& provisionerDir, const ContainerID& containerId, @@ -88,7 +88,7 @@ string getContainerRootfsDir( getBackendDir( getBackendsDir( getContainerDir( - getContainersDir(provisionerDir), + provisionerDir, containerId)), backend)), rootfsId); @@ -138,7 +138,7 @@ Try<hashmap<string, hashmap<string, string>>> listContainerRootfses( string backendsDir = getBackendsDir( getContainerDir( - getContainersDir(provisionerDir), + provisionerDir, containerId)); Try<list<string>> backends = os::ls(backendsDir); http://git-wip-us.apache.org/repos/asf/mesos/blob/05c608f8/src/slave/containerizer/provisioners/paths.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/paths.hpp b/src/slave/containerizer/provisioners/paths.hpp index a249088..5b82591 100644 --- a/src/slave/containerizer/provisioners/paths.hpp +++ b/src/slave/containerizer/provisioners/paths.hpp @@ -50,6 +50,11 @@ namespace paths { // due to the change of backend flags. Under each backend a rootfs is // identified by the 'rootfs_id' which is a UUID. +std::string getContainerDir( + const std::string& provisionerDir, + const ContainerID& containerId); + + std::string getContainerRootfsDir( const std::string& provisionerDir, const ContainerID& containerId, http://git-wip-us.apache.org/repos/asf/mesos/blob/05c608f8/src/tests/containerizer/appc_provisioner_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/appc_provisioner_tests.cpp b/src/tests/containerizer/appc_provisioner_tests.cpp index 73c32c1..8fee7ac 100644 --- a/src/tests/containerizer/appc_provisioner_tests.cpp +++ b/src/tests/containerizer/appc_provisioner_tests.cpp @@ -274,12 +274,15 @@ TEST_F(AppcProvisionerTest, ROOT_Provision) provisioners.get()[Image::APPC]->provision(containerId, image); AWAIT_READY(rootfs); - Try<list<string>> rootfses = os::ls(path::join( + string containerDir = path::join( flags.work_dir, "provisioners", stringify(Image::APPC), "containers", - containerId.value(), + containerId.value()); + + Try<list<string>> rootfses = os::ls(path::join( + containerDir, "backends", flags.appc_provisioner_backend, "rootfses")); @@ -295,6 +298,9 @@ TEST_F(AppcProvisionerTest, ROOT_Provision) // One rootfs is destroyed. EXPECT_TRUE(destroy.get()); + + // The container directory is successfully cleaned up. + EXPECT_FALSE(os::exists(containerDir)); } #endif // __linux__ @@ -378,12 +384,15 @@ TEST_F(AppcProvisionerTest, Recover) // from the same image. AWAIT_READY(provisioners2.get()[Image::APPC]->provision(containerId, image)); - Try<list<string>> rootfses = os::ls(path::join( + string containerDir = path::join( flags.work_dir, "provisioners", stringify(Image::APPC), "containers", - containerId.value(), + containerId.value()); + + Try<list<string>> rootfses = os::ls(path::join( + containerDir, "backends", flags.appc_provisioner_backend, "rootfses")); @@ -396,6 +405,9 @@ TEST_F(AppcProvisionerTest, Recover) Future<bool> destroy = provisioners2.get()[Image::APPC]->destroy(containerId); AWAIT_READY(destroy); EXPECT_TRUE(destroy.get()); + + // The container directory is successfully cleaned up. + EXPECT_FALSE(os::exists(containerDir)); } } // namespace tests {