Repository: mesos Updated Branches: refs/heads/master dee191293 -> 0b6dfacf3
Introduced bind-mount based provisioner Backend. Review: https://reviews.apache.org/r/37747 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/0b6dfacf Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/0b6dfacf Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/0b6dfacf Branch: refs/heads/master Commit: 0b6dfacf39b0dc7eb82ac891fa417a833a69e49c Parents: c091088 Author: Jiang Yan Xu <y...@jxu.me> Authored: Mon Aug 24 12:06:58 2015 -0700 Committer: Jiang Yan Xu <y...@jxu.me> Committed: Tue Aug 25 17:06:31 2015 -0700 ---------------------------------------------------------------------- src/Makefile.am | 5 +- .../containerizer/provisioners/backend.cpp | 31 +++- .../containerizer/provisioners/backend.hpp | 7 +- .../provisioners/backends/bind.cpp | 181 +++++++++++++++++++ .../provisioners/backends/bind.hpp | 75 ++++++++ .../containerizer/provisioner_backend_tests.cpp | 102 +++++++++++ src/tests/utils.hpp | 3 +- 7 files changed, 396 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/Makefile.am ---------------------------------------------------------------------- diff --git a/src/Makefile.am b/src/Makefile.am index 571e1ac..ff71408 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -625,6 +625,7 @@ if OS_LINUX libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/filesystem/linux.cpp libmesos_no_3rdparty_la_SOURCES += slave/containerizer/isolators/filesystem/shared.cpp libmesos_no_3rdparty_la_SOURCES += slave/containerizer/linux_launcher.cpp + libmesos_no_3rdparty_la_SOURCES += slave/containerizer/provisioners/backends/bind.cpp else EXTRA_DIST += linux/cgroups.cpp EXTRA_DIST += linux/fs.cpp @@ -741,6 +742,7 @@ libmesos_no_3rdparty_la_SOURCES += \ slave/containerizer/provisioners/appc/spec.hpp \ slave/containerizer/provisioners/appc/store.hpp \ slave/containerizer/provisioners/backend.hpp \ + slave/containerizer/provisioners/backends/bind.hpp \ slave/containerizer/isolators/posix.hpp \ slave/containerizer/isolators/posix/disk.hpp \ slave/containerizer/isolators/cgroups/constants.hpp \ @@ -1657,7 +1659,8 @@ mesos_tests_SOURCES = \ tests/containerizer/external_containerizer_test.cpp \ tests/containerizer/isolator_tests.cpp \ tests/containerizer/memory_test_helper.cpp \ - tests/containerizer/mesos_containerizer_tests.cpp + tests/containerizer/mesos_containerizer_tests.cpp \ + tests/containerizer/provisioner_backend_tests.cpp mesos_tests_CPPFLAGS = $(MESOS_CPPFLAGS) mesos_tests_CPPFLAGS += -DSOURCE_DIR=\"$(abs_top_srcdir)\" http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/slave/containerizer/provisioners/backend.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/backend.cpp b/src/slave/containerizer/provisioners/backend.cpp index 6190ce3..2f7c335 100644 --- a/src/slave/containerizer/provisioners/backend.cpp +++ b/src/slave/containerizer/provisioners/backend.cpp @@ -16,22 +16,43 @@ * limitations under the License. */ +#include <glog/logging.h> + +#include <stout/os.hpp> + #include "slave/containerizer/provisioners/backend.hpp" +#include "slave/containerizer/provisioners/backends/bind.hpp" + using namespace process; -using std::list; using std::string; -using std::vector; namespace mesos { namespace internal { namespace slave { -Try<Owned<Backend>> Backend::create(const Flags& flags) +hashmap<string, Owned<Backend>> Backend::create(const Flags& flags) { - // TODO(xujyan): Load backend implementations once they are introduced. - return Error("No Backend implementation available"); + hashmap<string, Try<Owned<Backend>>(*)(const Flags&)> creators; + +#ifdef __linux__ + creators.put("bind", &BindBackend::create); +#endif // __linux__ + + hashmap<string, Owned<Backend>> backends; + + foreachkey (const string& name, creators) { + Try<Owned<Backend>> backend = creators[name](flags); + if (backend.isError()) { + LOG(WARNING) << "Failed to create '" << name << "' backend: " + << backend.error(); + continue; + } + backends.put(name, backend.get()); + } + + return backends; } } // namespace slave { http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/slave/containerizer/provisioners/backend.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/backend.hpp b/src/slave/containerizer/provisioners/backend.hpp index 46120e8..a25b4ea 100644 --- a/src/slave/containerizer/provisioners/backend.hpp +++ b/src/slave/containerizer/provisioners/backend.hpp @@ -25,6 +25,7 @@ #include <process/future.hpp> #include <process/owned.hpp> +#include <stout/hashmap.hpp> #include <stout/try.hpp> #include "slave/flags.hpp" @@ -39,7 +40,11 @@ class Backend public: virtual ~Backend() {} - static Try<process::Owned<Backend>> create(const Flags& flags); + // Return a map of all supported backends keyed by their names. Note + // that Backends that failed to be created due to incorrect flags are + // simply not added to the result. + static hashmap<std::string, process::Owned<Backend>> create( + const Flags& flags); // Provision a root filesystem for a container into the specified 'rootfs' // directory by applying the specified list of root filesystem layers in http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/slave/containerizer/provisioners/backends/bind.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/backends/bind.cpp b/src/slave/containerizer/provisioners/backends/bind.cpp new file mode 100644 index 0000000..1cdae61 --- /dev/null +++ b/src/slave/containerizer/provisioners/backends/bind.cpp @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <process/dispatch.hpp> +#include <process/process.hpp> + +#include <stout/foreach.hpp> +#include <stout/os.hpp> + +#include "linux/fs.hpp" + +#include "slave/containerizer/provisioners/backends/bind.hpp" + +using namespace process; + +using std::string; +using std::vector; + +namespace mesos { +namespace internal { +namespace slave { + +class BindBackendProcess : public Process<BindBackendProcess> +{ +public: + Future<Nothing> provision(const vector<string>& layers, const string& rootfs); + + Future<bool> destroy(const string& rootfs); +}; + + +Try<Owned<Backend>> BindBackend::create(const Flags&) +{ + Result<string> user = os::user(); + if (!user.isSome()) { + return Error("Failed to determine user: " + + (user.isError() ? user.error() : "username not found")); + } + + if (user.get() != "root") { + return Error("BindBackend requires root privileges"); + } + + return Owned<Backend>(new BindBackend( + Owned<BindBackendProcess>(new BindBackendProcess()))); +} + + +BindBackend::~BindBackend() +{ + terminate(process.get()); + wait(process.get()); +} + + +BindBackend::BindBackend(Owned<BindBackendProcess> _process) + : process(_process) +{ + spawn(CHECK_NOTNULL(process.get())); +} + + +Future<Nothing> BindBackend::provision( + const vector<string>& layers, + const string& rootfs) +{ + return dispatch( + process.get(), &BindBackendProcess::provision, layers, rootfs); +} + + +Future<bool> BindBackend::destroy(const string& rootfs) +{ + return dispatch(process.get(), &BindBackendProcess::destroy, rootfs); +} + + +Future<Nothing> BindBackendProcess::provision( + const vector<string>& layers, + const string& rootfs) +{ + if (layers.size() > 1) { + return Failure( + "Multiple layers are not supported by the bind backend"); + } + + if (layers.size() == 0) { + return Failure("No filesystem layer provided"); + } + + Try<Nothing> mkdir = os::mkdir(rootfs); + if (mkdir.isError()) { + return Failure("Failed to create container rootfs at " + rootfs); + } + + // TODO(xujyan): Use MS_REC? Does any provisioner use mounts within + // its image store in a single layer? + Try<Nothing> mount = fs::mount( + layers.front(), + rootfs, + None(), + MS_BIND, + NULL); + + if (mount.isError()) { + return Failure( + "Failed to bind mount rootfs '" + layers.front() + + "' to '" + rootfs + "': " + mount.error()); + } + + // And remount it read-only. + mount = fs::mount( + None(), // Ignored. + rootfs, + None(), + MS_BIND | MS_RDONLY | MS_REMOUNT, + NULL); + + if (mount.isError()) { + return Failure( + "Failed to remount rootfs '" + rootfs + "' read-only: " + + mount.error()); + } + + return Nothing(); +} + + +Future<bool> BindBackendProcess::destroy(const string& rootfs) +{ + Try<fs::MountInfoTable> mountTable = fs::MountInfoTable::read(); + + if (mountTable.isError()) { + return Failure("Failed to read mount table: " + mountTable.error()); + } + + foreach (const fs::MountInfoTable::Entry& entry, mountTable.get().entries) { + // TODO(xujyan): If MS_REC was used in 'provision()' we would need to + // check `strings::startsWith(entry.target, rootfs)` here to unmount + // all nested mounts. + if (entry.target == rootfs) { + // NOTE: This would fail if the rootfs is still in use. + Try<Nothing> unmount = fs::unmount(entry.target); + if (unmount.isError()) { + return Failure( + "Failed to destroy bind-mounted rootfs '" + rootfs + "': " + + unmount.error()); + } + + Try<Nothing> rmdir = os::rmdir(rootfs); + if (rmdir.isError()) { + return Failure( + "Failed to remove rootfs mount point '" + rootfs + "': " + + rmdir.error()); + } + + return true; + } + } + + return false; +} + +} // namespace slave { +} // namespace internal { +} // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/slave/containerizer/provisioners/backends/bind.hpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/provisioners/backends/bind.hpp b/src/slave/containerizer/provisioners/backends/bind.hpp new file mode 100644 index 0000000..61a8838 --- /dev/null +++ b/src/slave/containerizer/provisioners/backends/bind.hpp @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MESOS_PROVISIONER_BIND_HPP__ +#define __MESOS_PROVISIONER_BIND_HPP__ + +#include "slave/containerizer/provisioners/backend.hpp" + +namespace mesos { +namespace internal { +namespace slave { + +// Forward declaration. +class BindBackendProcess; + + +// This is a specialized backend that may be useful for deployments +// using large (multi-GB) single-layer images *and* where more recent +// kernel features such as overlayfs are not available (overlayfs-based +// backend tracked by MESOS-2971). For small images (10's to 100's of MB) +// the copy backend may be sufficient. NOTE: +// 1) BindBackend supports only a single layer. Multi-layer images will +// fail to provision and the container will fail to launch! +// 2) The filesystem is read-only because all containers using this +// image share the source. Select writable areas can be achieved by +// mounting read-write volumes to places like /tmp, /var/tmp, +// /home, etc. using the ContainerInfo. These can be relative to +// the executor work directory. +// N.B. Since the filesystem is read-only, '--sandbox_directory' must +// already exist within the filesystem because the filesystem isolator +// is unable to create it! +// 3) It's fast because the bind mount requires (nearly) zero IO. +class BindBackend : public Backend +{ +public: + virtual ~BindBackend(); + + // BindBackend doesn't use any flag. + static Try<process::Owned<Backend>> create(const Flags&); + + virtual process::Future<Nothing> provision( + const std::vector<std::string>& layers, + const std::string& rootfs); + + virtual process::Future<bool> destroy(const std::string& rootfs); + +private: + explicit BindBackend(process::Owned<BindBackendProcess> process); + + BindBackend(const BindBackend&); // Not copyable. + BindBackend& operator=(const BindBackend&); // Not assignable. + + process::Owned<BindBackendProcess> process; +}; + +} // namespace slave { +} // namespace internal { +} // namespace mesos { + +#endif // __MESOS_PROVISIONER_BIND_HPP__ http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/tests/containerizer/provisioner_backend_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/provisioner_backend_tests.cpp b/src/tests/containerizer/provisioner_backend_tests.cpp new file mode 100644 index 0000000..096bd4d --- /dev/null +++ b/src/tests/containerizer/provisioner_backend_tests.cpp @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <process/gtest.hpp> + +#include <stout/foreach.hpp> +#include <stout/gtest.hpp> +#include <stout/os.hpp> +#include <stout/os/permissions.hpp> +#include <stout/path.hpp> +#include <stout/strings.hpp> + +#include "linux/fs.hpp" + +#include "slave/containerizer/provisioners/backends/bind.hpp" + +#include "tests/flags.hpp" +#include "tests/utils.hpp" + +using namespace process; + +using namespace mesos::internal::slave; + +using std::string; +using std::vector; + +namespace mesos { +namespace internal { +namespace tests { + +#ifdef __linux__ +class BindBackendTest : public TemporaryDirectoryTest +{ +protected: + void TearDown() + { + // Clean up by unmounting any leftover mounts in 'sandbox'. + Try<fs::MountInfoTable> mountTable = fs::MountInfoTable::read(); + ASSERT_SOME(mountTable); + + // TODO(xujyan): Make sandbox a plain string instead of an option. + ASSERT_SOME(sandbox); + foreach (const fs::MountInfoTable::Entry& entry, mountTable.get().entries) { + if (strings::startsWith(entry.target, sandbox.get())) { + fs::unmount(entry.target, MNT_DETACH); + } + } + + TemporaryDirectoryTest::TearDown(); + } +}; + + +// Provision a rootfs using a BindBackend to another directory and +// verify if it is read-only within the mount. +TEST_F(BindBackendTest, ROOT_BindBackend) +{ + string rootfs = path::join(os::getcwd(), "source"); + + // Create a writable directory under the dummy rootfs. + Try<Nothing> mkdir = os::mkdir(path::join(rootfs, "tmp")); + ASSERT_SOME(mkdir); + + hashmap<string, Owned<Backend>> backends = Backend::create(slave::Flags()); + ASSERT_TRUE(backends.contains("bind")); + + string target = path::join(os::getcwd(), "target"); + + AWAIT_READY(backends["bind"]->provision({rootfs}, target)); + + EXPECT_TRUE(os::stat::isdir(path::join(target, "tmp"))); + + // 'target' _appears_ to be writable but is really not due to read-only mount. + Try<mode_t> mode = os::stat::mode(path::join(target, "tmp")); + ASSERT_SOME(mode); + EXPECT_TRUE(os::Permissions(mode.get()).owner.w); + EXPECT_ERROR(os::write(path::join(target, "tmp", "test"), "data")); + + AWAIT_READY(backends["bind"]->destroy(target)); + + EXPECT_FALSE(os::exists(target)); +} +#endif // __linux__ + +} // namespace tests { +} // namespace internal { +} // namespace mesos { http://git-wip-us.apache.org/repos/asf/mesos/blob/0b6dfacf/src/tests/utils.hpp ---------------------------------------------------------------------- diff --git a/src/tests/utils.hpp b/src/tests/utils.hpp index f2eed2e..d4fc6ac 100644 --- a/src/tests/utils.hpp +++ b/src/tests/utils.hpp @@ -38,9 +38,10 @@ protected: virtual void SetUp(); virtual void TearDown(); + Option<std::string> sandbox; + private: std::string cwd; - Option<std::string> sandbox; };