Repository: mesos Updated Branches: refs/heads/master a6f1cf3a5 -> 2844a9617
Ensured that slave's work_dir is a shared mount in its own peer group when LinuxFilesystemIsolator is used. Review: https://reviews.apache.org/r/38858 Project: http://git-wip-us.apache.org/repos/asf/mesos/repo Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2844a961 Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2844a961 Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2844a961 Branch: refs/heads/master Commit: 2844a9617e4cec34b41c52632971d2c472c0f63f Parents: 68a9d95 Author: Jie Yu <[email protected]> Authored: Tue Sep 29 12:08:45 2015 -0700 Committer: Jie Yu <[email protected]> Committed: Tue Sep 29 16:15:11 2015 -0700 ---------------------------------------------------------------------- src/linux/fs.hpp | 3 + .../isolators/filesystem/linux.cpp | 51 +++++++++++++-- .../containerizer/filesystem_isolator_tests.cpp | 68 ++++++++++++++++++++ 3 files changed, 115 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mesos/blob/2844a961/src/linux/fs.hpp ---------------------------------------------------------------------- diff --git a/src/linux/fs.hpp b/src/linux/fs.hpp index 0eb1642..2206220 100644 --- a/src/linux/fs.hpp +++ b/src/linux/fs.hpp @@ -206,6 +206,9 @@ struct MountInfoTable { // for the calling process. static Try<MountInfoTable> read(const Option<pid_t>& pid = None()); + // TODO(jieyu): Introduce 'find' methods to find entries that match + // the given conditions (e.g., target, root, devno, etc.). + std::vector<Entry> entries; }; http://git-wip-us.apache.org/repos/asf/mesos/blob/2844a961/src/slave/containerizer/isolators/filesystem/linux.cpp ---------------------------------------------------------------------- diff --git a/src/slave/containerizer/isolators/filesystem/linux.cpp b/src/slave/containerizer/isolators/filesystem/linux.cpp index 8fa929f..8823b78 100644 --- a/src/slave/containerizer/isolators/filesystem/linux.cpp +++ b/src/slave/containerizer/isolators/filesystem/linux.cpp @@ -93,7 +93,8 @@ Try<Isolator*> LinuxFilesystemIsolatorProcess::create( } } - // Do a self bind mount if needed. + // Do a self bind mount if needed. If the mount already exists, make + // sure it is a shared mount of its own peer group. if (workDirMount.isNone()) { // NOTE: Instead of using fs::mount to perform the bind mount, we // use the shell command here because the syscall 'mount' does not @@ -117,13 +118,49 @@ Try<Isolator*> LinuxFilesystemIsolatorProcess::create( "Failed to self bind mount '" + flags.work_dir + "' and make it a shared mount: " + mount.error()); } - } + } else { + if (workDirMount.get().shared().isNone()) { + // This is the case where the work directory mount is not a + // shared mount yet (possibly due to slave crash while preparing + // the work directory mount). It's safe to re-do the following. + Try<string> mount = os::shell( + "mount --make-slave %s && " + "mount --make-shared %s", + flags.work_dir.c_str(), + flags.work_dir.c_str()); - // TODO(jieyu): Currently, we don't check if the slave's work_dir - // mount is a shared mount or not. We just assume it is. We cannot - // simply mark the slave as shared again because that will create a - // new peer group for the mounts. This is a temporary workaround for - // now while we are thinking about fixes. + if (mount.isError()) { + return Error( + "Failed to self bind mount '" + flags.work_dir + + "' and make it a shared mount: " + mount.error()); + } + } else { + // We need to make sure that the shared mount is in its own peer + // group. To check that, we need to get the parent mount. + foreach (const fs::MountInfoTable::Entry& entry, table.get().entries) { + if (entry.id == workDirMount.get().parent) { + // If the work directory mount and its parent mount are in + // the same peer group, we need to re-do the following + // commands so that they are in different peer groups. + if (entry.shared() == workDirMount.get().shared()) { + Try<string> mount = os::shell( + "mount --make-slave %s && " + "mount --make-shared %s", + flags.work_dir.c_str(), + flags.work_dir.c_str()); + + if (mount.isError()) { + return Error( + "Failed to self bind mount '" + flags.work_dir + + "' and make it a shared mount: " + mount.error()); + } + } + + break; + } + } + } + } Owned<MesosIsolatorProcess> process( new LinuxFilesystemIsolatorProcess(flags, provisioner)); http://git-wip-us.apache.org/repos/asf/mesos/blob/2844a961/src/tests/containerizer/filesystem_isolator_tests.cpp ---------------------------------------------------------------------- diff --git a/src/tests/containerizer/filesystem_isolator_tests.cpp b/src/tests/containerizer/filesystem_isolator_tests.cpp index 41d098c..1e332e0 100644 --- a/src/tests/containerizer/filesystem_isolator_tests.cpp +++ b/src/tests/containerizer/filesystem_isolator_tests.cpp @@ -33,6 +33,10 @@ #include <stout/path.hpp> #include <stout/uuid.hpp> +#ifdef __linux__ +#include "linux/fs.hpp" +#endif + #include "slave/paths.hpp" #ifdef __linux__ @@ -904,6 +908,70 @@ TEST_F(LinuxFilesystemIsolatorTest, ROOT_SandboxEnvironmentVariable) EXPECT_TRUE(wait.get().has_status()); EXPECT_EQ(0, wait.get().status()); } + + +// This test verifies the slave's work directory mount preparation if +// the mount does not exist initially. +TEST_F(LinuxFilesystemIsolatorTest, ROOT_WorkDirMount) +{ + slave::Flags flags = CreateSlaveFlags(); + + Try<Isolator*> isolator = + LinuxFilesystemIsolatorProcess::create(flags, Owned<Provisioner>()); + + ASSERT_SOME(isolator); + + Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); + ASSERT_SOME(table); + + bool mountFound = false; + foreach (const fs::MountInfoTable::Entry& entry, table.get().entries) { + if (entry.target == flags.work_dir) { + EXPECT_SOME(entry.shared()); + mountFound = true; + } + } + + EXPECT_TRUE(mountFound); + + delete isolator.get(); +} + + +// This test verifies the slave's work directory mount preparation if +// the mount already exists (e.g., to simulate the case when the slave +// crashes while preparing the work directory mount). +TEST_F(LinuxFilesystemIsolatorTest, ROOT_WorkDirMountPreExists) +{ + slave::Flags flags = CreateSlaveFlags(); + + // Simulate the situation in which the slave crashes while preparing + // the work directory mount. + ASSERT_SOME(os::shell( + "mount --bind %s %s", + flags.work_dir.c_str(), + flags.work_dir.c_str())); + + Try<Isolator*> isolator = + LinuxFilesystemIsolatorProcess::create(flags, Owned<Provisioner>()); + + ASSERT_SOME(isolator); + + Try<fs::MountInfoTable> table = fs::MountInfoTable::read(); + ASSERT_SOME(table); + + bool mountFound = false; + foreach (const fs::MountInfoTable::Entry& entry, table.get().entries) { + if (entry.target == flags.work_dir) { + EXPECT_SOME(entry.shared()); + mountFound = true; + } + } + + EXPECT_TRUE(mountFound); + + delete isolator.get(); +} #endif // __linux__ } // namespace tests {
