From: Alban Crequy <al...@endocode.com> When a service is chrooted with the option RootDirectory=/opt/..., then the options PrivateDevices, PrivateTmp, ProtectHome, ProtectSystem must mount the directories under $RootDirectory/{dev,tmp,home,usr,boot}.
This can be tested with test-ns as root: # export TEST_NS_PROJECTS=/home/alban/debian-tree/dev2/ # export TEST_NS_CHROOT=/home/alban/debian-tree # ./test-ns Questions: - I'm not quite sure about whether we can rollback in setup_namespace() v3: - add missing include "mkdir.h" - fixes after the review http://lists.freedesktop.org/archives/systemd-devel/2015-April/031202.html - move logic to setup_namespace() - either call setup_namespace() or chroot() - update ./test-ns so that I can run it without having /home/lennart v2: http://lists.freedesktop.org/archives/systemd-devel/2015-February/028636.html - create the $RootDirectory/dev directory if missing. This is consistent with mount unit creating the mount points if missing. - avoid arbitrary limitation on 512 characters v1: http://lists.freedesktop.org/archives/systemd-devel/2015-February/028609.html --- src/core/execute.c | 9 ++++-- src/core/namespace.c | 77 +++++++++++++++++++++++++++++++++++++++++++++------- src/core/namespace.h | 3 +- src/test/test-ns.c | 10 +++++-- 4 files changed, 83 insertions(+), 16 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index 1a297ba..d4ccac6 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1277,6 +1277,7 @@ static int exec_child( uid_t uid = UID_INVALID; gid_t gid = GID_INVALID; int i, r; + bool new_mnt_namespace; assert(unit); assert(command); @@ -1555,7 +1556,7 @@ static int exec_child( } } - if (!strv_isempty(context->read_write_dirs) || + new_mnt_namespace = !strv_isempty(context->read_write_dirs) || !strv_isempty(context->read_only_dirs) || !strv_isempty(context->inaccessible_dirs) || context->mount_flags != 0 || @@ -1563,8 +1564,9 @@ static int exec_child( params->bus_endpoint_path || context->private_devices || context->protect_system != PROTECT_SYSTEM_NO || - context->protect_home != PROTECT_HOME_NO) { + context->protect_home != PROTECT_HOME_NO; + if (new_mnt_namespace) { char *tmp = NULL, *var = NULL; /* The runtime struct only contains the parent @@ -1581,6 +1583,7 @@ static int exec_child( } r = setup_namespace( + params->apply_chroot ? context->root_directory : NULL, context->read_write_dirs, context->read_only_dirs, context->inaccessible_dirs, @@ -1606,7 +1609,7 @@ static int exec_child( } if (params->apply_chroot) { - if (context->root_directory) + if (!new_mnt_namespace && context->root_directory) if (chroot(context->root_directory) < 0) { *exit_status = EXIT_CHROOT; return -errno; diff --git a/src/core/namespace.c b/src/core/namespace.c index 718da23..9f5ba92 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -36,6 +36,7 @@ #include "dev-setup.h" #include "selinux-util.h" #include "namespace.h" +#include "mkdir.h" typedef enum MountMode { /* This is ordered by priority! */ @@ -124,6 +125,26 @@ static void drop_duplicates(BindMount *m, unsigned *n) { *n = t - m; } +static int mount_move_root(const char *path) { + if (chdir(path) < 0) { + return -errno; + } + + if (mount(path, "/", NULL, MS_MOVE, NULL) < 0) { + return -errno; + } + + if (chroot(".") < 0) { + return -errno; + } + + if (chdir("/") < 0) { + return -errno; + } + + return 0; +} + static int mount_dev(BindMount *m) { static const char devnodes[] = "/dev/null\0" @@ -225,7 +246,13 @@ static int mount_dev(BindMount *m) { dev_setup(temporary_mount); - if (mount(dev, "/dev/", NULL, MS_MOVE, NULL) < 0) { + /* Create the /dev directory if missing. It is more likely to be + * missing when the service is started with RootDirectory. This is + * consistent with mount units creating the mount points when missing. + */ + mkdir_p_label (m->path, 0755); + + if (mount(dev, m->path, NULL, MS_MOVE, NULL) < 0) { r = -errno; goto fail; } @@ -404,6 +431,7 @@ static int make_read_only(BindMount *m) { } int setup_namespace( + const char* chroot, char** read_write_dirs, char** read_only_dirs, char** inaccessible_dirs, @@ -449,37 +477,48 @@ int setup_namespace( return r; if (tmp_dir) { - m->path = "/tmp"; + m->path = strjoina(chroot?:"", "/tmp"); m->mode = PRIVATE_TMP; m++; } if (var_tmp_dir) { - m->path = "/var/tmp"; + m->path = strjoina(chroot?:"", "/var/tmp"); m->mode = PRIVATE_VAR_TMP; m++; } if (private_dev) { - m->path = "/dev"; + m->path = strjoina(chroot?:"", "/dev"); m->mode = PRIVATE_DEV; m++; } if (bus_endpoint_path) { - m->path = bus_endpoint_path; + m->path = strjoina(chroot?:"", bus_endpoint_path); m->mode = PRIVATE_BUS_ENDPOINT; m++; } if (protect_home != PROTECT_HOME_NO) { - r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user", "-/root"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); + r = append_mounts(&m, STRV_MAKE( + strjoina("-", chroot?:"", "/home"), + strjoina("-", chroot?:"", "/run/user"), + strjoina("-", chroot?:"", "/root")), + protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE); if (r < 0) return r; } if (protect_system != PROTECT_SYSTEM_NO) { - r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "-/boot", "/etc") : STRV_MAKE("/usr", "-/boot"), READONLY); + r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL + ? STRV_MAKE( + strjoina(chroot?:"", "/usr"), + strjoina("-", chroot?:"", "/boot"), + strjoina(chroot?:"", "/etc") + ) : STRV_MAKE( + strjoina(chroot?:"", "/usr"), + strjoina("-", chroot?:"", "/boot")), READONLY); if (r < 0) return r; } @@ -490,12 +529,21 @@ int setup_namespace( drop_duplicates(mounts, &n); } - if (n > 0) { + if (n > 0 || chroot) { /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) return -errno; + } + + if (chroot) { + /* Turn directory into bind mount */ + if (mount(chroot, chroot, NULL, MS_BIND|MS_REC, NULL) < 0) { + return -errno; + } + } + if (n > 0) { for (m = mounts; m < mounts + n; ++m) { r = apply_mount(m, tmp_dir, var_tmp_dir); if (r < 0) @@ -509,12 +557,21 @@ int setup_namespace( } } + if (chroot) { + /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ + r = mount_move_root(chroot); + + /* at this point, we cannot rollback */ + if (r < 0) + return r; + } + /* Remount / as the desired mode. Not that this will not * reestablish propagation from our side to the host, since * what's disconnected is disconnected. */ if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) { - r = -errno; - goto fail; + /* at this point, we cannot rollback */ + return -errno; } return 0; diff --git a/src/core/namespace.h b/src/core/namespace.h index 42b92e7..00ab22b 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -41,7 +41,8 @@ typedef enum ProtectSystem { _PROTECT_SYSTEM_INVALID = -1 } ProtectSystem; -int setup_namespace(char **read_write_dirs, +int setup_namespace(const char *chroot, + char **read_write_dirs, char **read_only_dirs, char **inaccessible_dirs, const char *tmp_dir, diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 76b131c..b412e98 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -38,10 +38,12 @@ int main(int argc, char *argv[]) { NULL }; - const char * const inaccessible[] = { + const char *inaccessible[] = { "/home/lennart/projects", NULL }; + char *chroot = getenv("TEST_NS_CHROOT"); + char *projects = getenv("TEST_NS_PROJECTS"); int r; char tmp_dir[] = "/tmp/systemd-private-XXXXXX", @@ -50,7 +52,11 @@ int main(int argc, char *argv[]) { assert_se(mkdtemp(tmp_dir)); assert_se(mkdtemp(var_tmp_dir)); - r = setup_namespace((char **) writable, + if (projects) + inaccessible[0] = projects; + + r = setup_namespace(chroot, + (char **) writable, (char **) readonly, (char **) inaccessible, tmp_dir, -- 2.1.4 _______________________________________________ systemd-devel mailing list systemd-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/systemd-devel