Support for entering and configuring a Linux chroot.

Review: https://reviews.apache.org/r/32891/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/6950310f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/6950310f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/6950310f

Branch: refs/heads/master
Commit: 6950310f75ab1a5235e1ae03578797f0a01b2a5e
Parents: 828806c
Author: Ian Downes <[email protected]>
Authored: Mon Apr 6 10:16:38 2015 -0700
Committer: Ian Downes <[email protected]>
Committed: Tue Jul 7 15:39:52 2015 -0700

----------------------------------------------------------------------
 src/linux/fs.cpp | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/linux/fs.hpp |  18 ++++
 2 files changed, 288 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/6950310f/src/linux/fs.cpp
----------------------------------------------------------------------
diff --git a/src/linux/fs.cpp b/src/linux/fs.cpp
index 568565f..6d5af25 100644
--- a/src/linux/fs.cpp
+++ b/src/linux/fs.cpp
@@ -28,12 +28,16 @@
 #include <stout/strings.hpp>
 #include <stout/synchronized.hpp>
 
+#include <stout/fs.hpp>
+#include <stout/os.hpp>
+
 #include <stout/os/read.hpp>
 #include <stout/os/stat.hpp>
 
 #include "linux/fs.hpp"
 
 using std::string;
+using std::vector;
 
 namespace mesos {
 namespace internal {
@@ -278,6 +282,21 @@ Try<Nothing> mount(const Option<string>& source,
 }
 
 
+Try<Nothing> mount(const Option<string>& source,
+                   const string& target,
+                   const Option<string>& type,
+                   unsigned long flags,
+                   const Option<string>& options)
+{
+  return mount(
+      source,
+      target,
+      type,
+      flags,
+      options.isSome() ? options.get().c_str() : NULL);
+}
+
+
 Try<Nothing> unmount(const string& target, int flags)
 {
   // The prototype of function 'umount2' on Linux is as follows:
@@ -334,6 +353,257 @@ Try<Nothing> pivot_root(
 }
 
 
+namespace chroot {
+
+namespace internal {
+
+Try<Nothing> copyDeviceNode(const string& source, const string& target)
+{
+  // We are likely to be operating in a multi-threaded environment so
+  // it's not safe to change the umask. Instead, we'll explicitly set
+  // permissions after we create the device node.
+  Try<mode_t> mode = os::stat::mode(source);
+  if (mode.isError()) {
+    return Error("Failed to source mode: " + mode.error());
+  }
+
+  Try<dev_t> dev = os::stat::rdev(source);
+  if (dev.isError()) {
+    return Error("Failed to get source dev: " + dev.error());
+  }
+
+  Try<Nothing> mknod = os::mknod(target, mode.get(), dev.get());
+  if (mknod.isError()) {
+    return Error("Failed to create device:" +  mknod.error());
+  }
+
+  Try<Nothing> chmod = os::chmod(target, mode.get());
+  if (chmod.isError()) {
+    return Error("Failed to chmod device: " + chmod.error());
+  }
+
+  return Nothing();
+}
+
+
+// Some helpful types.
+struct Mount
+{
+  Option<string> source;
+  string target;
+  Option<string> type;
+  Option<string> options;
+  unsigned long flags;
+};
+
+struct SymLink
+{
+  string original;
+  string link;
+};
+
+
+Try<Nothing> mountSpecialFilesystems(const string& root)
+{
+  // List of special filesystems useful for a chroot environment.
+  // NOTE: This list is ordered, e.g., mount /proc before bind
+  // mounting /proc/sys and then making it read-only.
+  vector<Mount> mounts = {
+    {"proc",      "/proc",     "proc",   None(),      MS_NOSUID | MS_NOEXEC | 
MS_NODEV},             // NOLINT(whitespace/line_length)
+    {"/proc/sys", "/proc/sys", None(),   None(),      MS_BIND},
+    {None(),      "/proc/sys", None(),   None(),      MS_BIND | MS_RDONLY | 
MS_REMOUNT},             // NOLINT(whitespace/line_length)
+    {"sysfs",     "/sys",      "sysfs",  None(),      MS_RDONLY | MS_NOSUID | 
MS_NOEXEC | MS_NODEV}, // NOLINT(whitespace/line_length)
+    {"tmpfs",     "/dev",      "tmpfs",  "mode=755",  MS_NOSUID | 
MS_STRICTATIME},                   // NOLINT(whitespace/line_length)
+    {"devpts",    "/dev/pts",  "devpts", "newinstance,ptmxmode=0666", 
MS_NOSUID | MS_NOEXEC},        // NOLINT(whitespace/line_length)
+    {"tmpfs",     "/dev/shm",  "tmpfs",  "mode=1777", MS_NOSUID | MS_NODEV | 
MS_STRICTATIME},        // NOLINT(whitespace/line_length)
+  };
+
+  foreach (const Mount& mount, mounts) {
+    // Target is always under the new root.
+    const string target = path::join(root, mount.target);
+
+    // Try to create the mount point, if it doesn't already exist.
+    if (!os::exists(target)) {
+      Try<Nothing> mkdir = os::mkdir(target);
+
+      if (mkdir.isError()) {
+        return Error("Failed to create mount point '" + target +
+                     "': " + mkdir.error());
+      }
+    }
+
+    // If source is a path, e.g,. for a bind mount, then it needs to
+    // be prefixed by the new root.
+    Option<string> source;
+    if (mount.source.isSome() && strings::startsWith(mount.source.get(), "/")) 
{
+      source = path::join(root, mount.source.get());
+    } else {
+      source = mount.source;
+    }
+
+    Try<Nothing> mnt = fs::mount(
+        source,
+        target,
+        mount.type,
+        mount.flags,
+        mount.options);
+
+    if (mnt.isError()) {
+      return Error("Failed to mount '" + target + "': " + mnt.error());
+    }
+  }
+
+  return Nothing();
+}
+
+
+Try<Nothing> createStandardDevices(const string& root)
+{
+  // List of standard devices useful for a chroot environment.
+  // TODO(idownes): Make this list configurable.
+  vector<string> devices = {
+    "full",
+    "null",
+    "random",
+    "tty",
+    "urandom",
+    "zero"
+  };
+
+  foreach (const string& device, devices) {
+    // Copy the mode and device from the corresponding host device.
+    Try<Nothing> copy = copyDeviceNode(
+        path::join("/",  "dev", device),
+        path::join(root, "dev", device));
+
+    if (copy.isError()) {
+      return Error("Failed to copy device '" + device + "': " + copy.error());
+    }
+  }
+
+  vector<SymLink> symlinks = {
+    {"/proc/self/fd0", path::join(root, "dev", "stdin")},
+    {"/proc/self/fd1", path::join(root, "dev", "stdout")},
+    {"/proc/self/fd2", path::join(root, "dev", "stderr")},
+    {"pts/ptmx",       path::join(root, "dev", "ptmx")}
+  };
+
+  foreach (const SymLink& symlink, symlinks) {
+    Try<Nothing> link = ::fs::symlink(symlink.original, symlink.link);
+    if (link.isError()) {
+      return Error("Failed to symlink '" + symlink.original +
+                   "' to '" + symlink.link + "': " + link.error());
+    }
+  }
+
+  // TODO(idownes): Set up console device.
+  return Nothing();
+}
+
+} // namespace internal {
+
+// TODO(idownes): Add unit test.
+Try<Nothing> enter(const string& root)
+{
+  // Recursively mark current mounts as slaves to prevent propagation.
+  Try<Nothing> mount = fs::mount(None(), "/", None(), MS_REC | MS_SLAVE, NULL);
+  if (mount.isError()) {
+    return Error("Failed to make slave mounts: " + mount.error());
+  }
+
+  // Mount special filesystems.
+  mount = internal::mountSpecialFilesystems(root);
+  if (mount.isError()) {
+    return Error("Failed to mount: " + mount.error());
+  }
+
+  // Create basic device nodes.
+  Try<Nothing> create = internal::createStandardDevices(root);
+  if (create.isError()) {
+    return Error("Failed to create devices: " + create.error());
+  }
+
+  // Create a /tmp directory if it doesn't exist.
+  // TODO(idownes): Consider mounting a tmpfs to /tmp.
+  if (!os::exists(path::join(root, "tmp"))) {
+    Try<Nothing> mkdir = os::mkdir(path::join(root, "tmp"));
+     if (mkdir.isError()) {
+       return Error("Failed to create /tmp in chroot: " + mkdir.error());
+     }
+
+     Try<Nothing> chmod = os::chmod(
+         path::join(root, "tmp"),
+         S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
+
+     if (chmod.isError()) {
+       return Error("Failed to set mode on /tmp: " + chmod.error());
+     }
+  }
+
+  // Create a mount point for the old root.
+  Try<string> old = os::mkdtemp(path::join(root, "tmp", "._old_root_.XXXXXX"));
+  if (old.isError()) {
+    return Error("Failed to create mount point for old root: " + old.error());
+  }
+
+  // Chroot to the new root. This is done by a particular sequence of
+  // operations, each of which is necessary: chdir, pivot_root,
+  // chroot, chdir. After these operations, the process will be
+  // chrooted to the new root.
+
+  // Chdir to the new root.
+  Try<Nothing> chdir = os::chdir(root);
+  if (chdir.isError()) {
+    return Error("Failed to chdir to new root: " + chdir.error());
+  }
+
+  // Pivot the root to the cwd.
+  Try<Nothing> pivot = fs::pivot_root(root, old.get());
+  if (pivot.isError()) {
+    return Error("Failed to pivot to new root: " + pivot.error());
+  }
+
+  // Chroot to the new "/". This is necessary to correctly set the
+  // base for all paths.
+  Try<Nothing> chroot = os::chroot(".");
+  if (chroot.isError()) {
+    return Error("Failed to chroot to new root: " + chroot.error());
+  }
+
+  // Ensure all references are within the new root.
+  chdir = os::chdir("/");
+  if (chdir.isError()) {
+    return Error("Failed to chdir to new root: " + chdir.error());
+  }
+
+  // Unmount filesystems on the old root. Note, any filesystems that
+  // were mounted to the chroot directory will be correctly pivoted.
+  Try<fs::MountTable> mountTable = fs::MountTable::read("/proc/mounts");
+  if (mountTable.isError()) {
+    return Error("Failed to read mount table: " + mountTable.error());
+  }
+
+  // The old root is now relative to chroot so remove the chroot path.
+  const string relativeOld = strings::remove(old.get(), root, strings::PREFIX);
+
+  foreach (const fs::MountTable::Entry& entry, mountTable.get().entries) {
+    // TODO(idownes): sort the entries and remove depth first so we
+    // don't rely on the lazy umount and can check the status.
+    if (strings::startsWith(entry.dir, relativeOld)) {
+      fs::unmount(entry.dir, MNT_DETACH);
+    }
+  }
+
+  // TODO(idownes): If any of the lazy umounts above is still pending
+  // this will fail, leaving behind an empty directory which we'll
+  // ignore.
+  // Check status when we stop using lazy umounts.
+  os::rmdir(relativeOld);
+
+  return Nothing();
+}
+
+} // namespace chroot {
 } // namespace fs {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/6950310f/src/linux/fs.hpp
----------------------------------------------------------------------
diff --git a/src/linux/fs.hpp b/src/linux/fs.hpp
index bcb0a1b..f3aa0c2 100644
--- a/src/linux/fs.hpp
+++ b/src/linux/fs.hpp
@@ -311,6 +311,15 @@ Try<Nothing> mount(const Option<std::string>& source,
                    const void* data);
 
 
+// Alternate version of mount which passes an option string as
+// additional data for the filesystem mount.
+Try<Nothing> mount(const Option<std::string>& source,
+                   const std::string& target,
+                   const Option<std::string>& type,
+                   unsigned long flags,
+                   const Option<std::string>& options);
+
+
 // Unmount a file system.
 // @param   target    The (topmost) directory where the file system attaches.
 // @param   flags     Unmount flags.
@@ -321,6 +330,15 @@ Try<Nothing> unmount(const std::string& target, int flags 
= 0);
 // Change the root filesystem.
 Try<Nothing> pivot_root(const std::string& newRoot, const std::string& putOld);
 
+namespace chroot {
+
+// Enter a 'chroot' enviroment. The caller should be in a new mount
+// namespace. Basic configuration of special filesystems and device
+// nodes is performed. Any mounts to the current root will be
+// unmounted.
+Try<Nothing> enter(const std::string& root);
+
+} // namespace chroot {
 
 } // namespace fs {
 } // namespace internal {

Reply via email to