The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxc/pull/1731
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === Fix --share-net for unprivileged users. Add a template to create containers based on OCI images.
From 82428d1e74d9c0ce1df022d232af9d19c66767b9 Mon Sep 17 00:00:00 2001 From: Serge Hallyn <[email protected]> Date: Sat, 5 Aug 2017 11:24:25 -0500 Subject: [PATCH 1/2] [WIP] Add OCi template Signed-off-by: Serge Hallyn <[email protected]> --- templates/lxc-oci.in | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100755 templates/lxc-oci.in diff --git a/templates/lxc-oci.in b/templates/lxc-oci.in new file mode 100755 index 000000000..598ed403c --- /dev/null +++ b/templates/lxc-oci.in @@ -0,0 +1,159 @@ +#!/bin/sh + +# Create application containers from OCI images + +# Copyright © 2014 Stéphane Graber <[email protected]> +# Copyright © 2017 Serge Hallyn <[email protected]> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 +# USA + +set -eu + +# Make sure the usual locations are in PATH +export PATH=$PATH:/usr/sbin:/usr/bin:/sbin:/bin + +# Check for required binaries +for bin in skopeo oci-image-tool; do + if ! type $bin >/dev/null 2>&1; then + echo "ERROR: Missing required tool: $bin" 1>&2 + exit 1 + fi +done + +# Some useful functions +cleanup() { + if [ -d "$DOWNLOAD_TEMP" ]; then + rm -Rf $DOWNLOAD_TEMP + fi +} + +in_userns() { + [ -e /proc/self/uid_map ] || { echo no; return; } + while read line; do + fields=$(echo $line | awk '{ print $1 " " $2 " " $3 }') + [ "$fields" = "0 0 4294967295" ] && { echo no; return; } || true + echo $fields | grep -q " 0 1$" && { echo userns-root; return; } || true + done < /proc/self/uid_map + + [ "$(cat /proc/self/uid_map)" = "$(cat /proc/1/uid_map)" ] && \ + { echo userns-root; return; } + echo yes +} + +usage() { + cat <<EOF +LXC container template for OCI images + +Special arguments: +[ -h | --help ]: Print this help message and exit. + +Required arguments: +[ -u | --url <url> ]: The OCi image URL + +LXC internal arguments (do not pass manually!): +[ --name <name> ]: The container name +[ --path <path> ]: The path to the container +[ --rootfs <rootfs> ]: The path to the container's rootfs +[ --mapped-uid <map> ]: A uid map (user namespaces) +[ --mapped-gid <map> ]: A gid map (user namespaces) + +EOF + return 0 +} + +options=$(getopt -o u:h -l help,name:,path:,\ +rootfs:,mapped-uid:,mapped-gid: -- "$@") + +if [ $? -ne 0 ]; then + usage + exit 1 +fi +eval set -- "$options" + +OCI_URL="" + +while :; do + case "$1" in + -h|--help) usage && exit 1;; + -u|--url) OCI_URL=$2; shift 2;; + --name) LXC_NAME=$2; shift 2;; + --path) LXC_PATH=$2; shift 2;; + --rootfs) LXC_ROOTFS=$2; shift 2;; + --mapped-uid) LXC_MAPPED_UID=$2; shift 2;; + --mapped-gid) LXC_MAPPED_GID=$2; shift 2;; + *) break;; + esac +done + +# Check that we have all variables we need +if [ -z "$LXC_NAME" ] || [ -z "$LXC_PATH" ] || [ -z "$LXC_ROOTFS" ]; then + echo "ERROR: Not running through LXC." 1>&2 + exit 1 +fi + +if [ -z "$OCI_URL" ]; then + echo "ERROR: no OCI URL given" + exit 1 +fi + +USERNS=$(in_userns) + +if [ "$USERNS" != "no" ]; then + if [ "$USERNS" = "yes" ]; then + if [ -z "$LXC_MAPPED_UID" ] || [ "$LXC_MAPPED_UID" = "-1" ]; then + echo "ERROR: In a user namespace without a map." 1>&2 + exit 1 + fi + DOWNLOAD_MODE="user" + DOWNLOAD_TARGET="user" + else + DOWNLOAD_MODE="user" + DOWNLOAD_TARGET="system" + fi +fi + +# Trap all exit signals +trap cleanup EXIT HUP INT TERM + +if ! type mktemp >/dev/null 2>&1; then + DOWNLOAD_TEMP=/tmp/lxc-oci.$$ + mkdir -p $DOWNLOAD_TEMP +else + DOWNLOAD_TEMP=$(mktemp -d) +fi + +# Download the image - TODO - cache +skopeo copy "${OCI_URL}" "oci:${DOWNLOAD_TEMP}:latest" + +# Unpack the rootfs +echo "Unpacking the rootfs" + +oci-image-tool unpack --ref latest ${DOWNLOAD_TEMP} ${LXC_ROOTFS} + +LXC_CONF_FILE="${LXC_PATH}/config" +echo "lxc.init_cmd = /bin/sh" >> "${LXC_CONF_FILE}" +echo "lxc.mount.auto = proc:mixed sys:mixed cgroup:mixed" >> "${LXC_CONF_FILE}" + +echo "lxc.utsname = ${LXC_NAME}" >> ${LXC_PATH}/config + +if [ -n "$LXC_MAPPED_UID" ] && [ "$LXC_MAPPED_UID" != "-1" ]; then + chown $LXC_MAPPED_UID $LXC_PATH/config $LXC_PATH/fstab >/dev/null 2>&1 || true +fi +if [ -n "$LXC_MAPPED_GID" ] && [ "$LXC_MAPPED_GID" != "-1" ]; then + chgrp $LXC_MAPPED_GID $LXC_PATH/config $LXC_PATH/fstab >/dev/null 2>&1 || true +fi + +exit 0 From 6b7d5edcf677ec41406837525c14a17140013a7d Mon Sep 17 00:00:00 2001 From: Serge Hallyn <[email protected]> Date: Sat, 5 Aug 2017 14:47:44 -0500 Subject: [PATCH 2/2] attach-ns: work unprivileged. Also, close the fds after successful attach to inherted fds. This requires quite a few workarounds due to limitations in what we can do with unprivileged namespaces. To join another task's network ns, we must join their user_ns (or a parent thereof, to which we have privilege - we don't currently do anything to guarantee that exists). Then, as we are not the parent of the user_ns, we cannot move the new task into the right cgroups without first entering the same namespace. This can all certainly be cleaned up and better organized. This is a first attempt which just makes it work. So now lxc-start --attach-net someotherpid -n x1 can work unprivileged. Signed-off-by: Serge Hallyn <[email protected]> --- src/lxc/criu.c | 2 +- src/lxc/namespace.c | 78 +++++++++++++++++++++++++++++++- src/lxc/namespace.h | 1 + src/lxc/start.c | 110 +++++++++++++++++++++++++++++++++++++--------- src/lxc/start.h | 2 +- src/lxc/tools/lxc_start.c | 28 ++++++++++++ 6 files changed, 197 insertions(+), 24 deletions(-) diff --git a/src/lxc/criu.c b/src/lxc/criu.c index c9384bdd2..47341d822 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -823,7 +823,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ goto out_fini_handler; } - resolve_clone_flags(handler); + resolve_clone_flags(handler, false); // TODO - fix second argument if (pipe(pipes) < 0) { SYSERROR("pipe() failed"); diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c index 3a5b3bef6..500269561 100644 --- a/src/lxc/namespace.c +++ b/src/lxc/namespace.c @@ -24,15 +24,20 @@ #include <unistd.h> #include <alloca.h> #include <errno.h> +#include <sched.h> #include <signal.h> +#include <stdlib.h> #include <sys/param.h> #include <sys/types.h> #include <sys/stat.h> -#include <fcntl.h> #include "namespace.h" #include "log.h" +int setresuid(uid_t ruid, uid_t euid, uid_t suid); +int setresgid(gid_t rgid, gid_t egid, gid_t sgid); +int setns(int fd, int nstype); + lxc_log_define(lxc_namespace, lxc); struct clone_arg { @@ -69,6 +74,77 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags) return ret; } +/* + * like lxc_clone, but first attach to an existing user_ns + */ +pid_t lxc_clone_special_userns(int (*fn)(void *), void *arg, int flags) +{ + struct lxc_handler *handler = arg; + struct clone_arg clone_arg = { + .fn = fn, + .arg = arg, + }; + size_t stack_size = sysconf(_SC_PAGESIZE); + void *stack = alloca(stack_size); + pid_t ret, pid; + int p[2]; + + if (handler->conf->inherit_ns_fd[LXC_NS_USER] == -1) { + ERROR("lxc_clone_special_userns: i shouldn't have been called"); + return -1; + } + if (pipe(p) < 0) + return -1; + + pid = fork(); + if (pid < 0) + return pid; + if (pid > 0) { + close(p[1]); + ret = -1; + ret = read(p[0], &pid, sizeof(pid_t)); + close(p[0]); + if (ret != sizeof(pid_t)) + return -1; + return pid; + } + close(p[0]); + + ret = setns(handler->conf->inherit_ns_fd[LXC_NS_USER], 0); + if (ret < 0) { + ERROR("Failed setting requested existing userns"); + exit(1); + } + ret = setresgid(0, 0, 0); + if (ret < 0) { + ERROR("Failed setting gid to container 0"); + exit(1); + } + ret = setresuid(0, 0, 0); + if (ret < 0) { + ERROR("Failed setting uid to container 0"); + exit(1); + } + stack_size = sysconf(_SC_PAGESIZE); + stack = alloca(stack_size); + flags &= ~CLONE_NEWUSER; + + close(handler->conf->inherit_ns_fd[LXC_NS_USER]); + handler->conf->inherit_ns_fd[LXC_NS_USER] = -1; +#ifdef __ia64__ + ret = __clone2(do_clone, stack, + stack_size, flags | SIGCHLD, &clone_arg); +#else + ret = clone(do_clone, stack + stack_size, flags | SIGCHLD, &clone_arg); +#endif + if (ret < 0) + ERROR("Failed to clone (%#x): %s.", flags, strerror(errno)); + + if (write(p[1], &ret, sizeof(pid_t)) != sizeof(pid_t)) + exit(1); + exit(0); +} + /* Leave the user namespace at the first position in the array of structs so * that we always attach to it first when iterating over the struct and using * setns() to switch namespaces. This especially affects lxc_attach(): Suppose diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h index 4916950c1..e58877448 100644 --- a/src/lxc/namespace.h +++ b/src/lxc/namespace.h @@ -80,6 +80,7 @@ int clone(int (*fn)(void *), void *child_stack, #endif extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags); +extern pid_t lxc_clone_special_userns(int (*fn)(void *), void *arg, int flags); extern int lxc_namespace_2_cloneflag(char *namespace); extern int lxc_fill_namespace_flags(char *flaglist, int *flags); diff --git a/src/lxc/start.c b/src/lxc/start.c index a360f784c..7e7ba52b8 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -168,20 +168,27 @@ static bool preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags, pid_t pid) } static int attach_ns(const int ns_fd[LXC_NS_MAX]) { - int i; + int i, ret = -1; for (i = 0; i < LXC_NS_MAX; i++) { if (ns_fd[i] < 0) continue; + INFO("Attaching to %s namespace.", ns_info[i].proc_name); if (setns(ns_fd[i], 0) != 0) goto error; } - return 0; + ret = 0; error: - SYSERROR("Failed to attach %s namespace.", ns_info[i].proc_name); - return -1; + if (ret) + SYSERROR("Failed to attach %s namespace.", ns_info[i].proc_name); + + for (i = 0; i < LXC_NS_MAX; i++) { + if (ns_fd[i] < 0) + close(ns_fd[i]); + } + return ret; } static int match_fd(int fd) @@ -859,10 +866,14 @@ static int do_start(void *data) if (lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP)) return -1; + if (attach_ns(handler->conf->inherit_ns_fd) < 0) + return -1; + /* Unshare CLONE_NEWNET after CLONE_NEWUSER. See * https://github.com/lxc/lxd/issues/1978. */ - if ((handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) == + if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1 && + (handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) == (CLONE_NEWNET | CLONE_NEWUSER)) { ret = unshare(CLONE_NEWNET); if (ret < 0) { @@ -1168,10 +1179,18 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) return ret; } -void resolve_clone_flags(struct lxc_handler *handler) +void resolve_clone_flags(struct lxc_handler *handler, bool wants_to_map_ids) { handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS; + if (wants_to_map_ids) { + handler->clone_flags |= CLONE_NEWUSER | CLONE_NEWIPC | \ + CLONE_NEWUTS; + if (!lxc_requests_empty_network(handler)) + handler->clone_flags |= CLONE_NEWNET; + return ; + } + if (!lxc_list_empty(&handler->conf->id_map)) handler->clone_flags |= CLONE_NEWUSER; @@ -1193,6 +1212,38 @@ void resolve_clone_flags(struct lxc_handler *handler) INFO("Inheriting a UTS namespace."); } +static bool enter_cgroup_in_ns(int ufd, struct lxc_handler *handler) +{ + int pid, ret; + + INFO("switching to %d user_ns to set cgroups", handler->pid); + pid = fork(); + if (pid < 0) + return -1; + if (pid > 0) + return wait_for_pid(pid); + + ret = setns(ufd, CLONE_NEWUSER); + if (ret) { + SYSERROR("Failed to switch to ns to enter cgroup"); + sleep(120); + exit(1); + } + ret = setresgid(0, 0, 0); + if (ret < 0) { + ERROR("Failed setting gid to container 0"); + exit(1); + } + ret = setresuid(0, 0, 0); + if (ret < 0) { + ERROR("Failed setting uid to container 0"); + exit(1); + } + if (!cgroup_enter(handler)) + exit(1); + exit(0); +} + /* lxc_spawn() performs crucial setup tasks and clone()s the new process which * exec()s the requested container binary. * Note that lxc_spawn() runs in the parent namespaces. Any operations performed @@ -1207,7 +1258,7 @@ static int lxc_spawn(struct lxc_handler *handler) bool cgroups_connected = false; int saved_ns_fd[LXC_NS_MAX]; int preserve_mask = 0, i, flags; - int netpipepair[2], nveths; + int netpipepair[2], nveths, joined_unpriv_userns = -1; bool wants_to_map_ids; struct lxc_list *id_map; @@ -1215,9 +1266,11 @@ static int lxc_spawn(struct lxc_handler *handler) id_map = &handler->conf->id_map; wants_to_map_ids = !lxc_list_empty(id_map); - for (i = 0; i < LXC_NS_MAX; i++) - if (handler->conf->inherit_ns_fd[i] != -1) - preserve_mask |= ns_info[i].clone_flag; + if (!wants_to_map_ids) { + for (i = 0; i < LXC_NS_MAX; i++) + if (handler->conf->inherit_ns_fd[i] != -1) + preserve_mask |= ns_info[i].clone_flag; + } if (lxc_sync_init(handler)) return -1; @@ -1227,7 +1280,7 @@ static int lxc_spawn(struct lxc_handler *handler) return -1; } - resolve_clone_flags(handler); + resolve_clone_flags(handler, wants_to_map_ids); if (handler->clone_flags & CLONE_NEWNET) { if (!lxc_list_empty(&handler->conf->network)) { @@ -1281,10 +1334,10 @@ static int lxc_spawn(struct lxc_handler *handler) INFO("Failed to pin the rootfs for container \"%s\".", handler->name); } - if (!preserve_ns(saved_ns_fd, preserve_mask, getpid())) + if (!wants_to_map_ids && !preserve_ns(saved_ns_fd, preserve_mask, getpid())) goto out_delete_net; - if (attach_ns(handler->conf->inherit_ns_fd) < 0) + if (!wants_to_map_ids && attach_ns(handler->conf->inherit_ns_fd) < 0) goto out_delete_net; if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) { @@ -1306,7 +1359,12 @@ static int lxc_spawn(struct lxc_handler *handler) */ flags &= ~CLONE_NEWNET; } - handler->pid = lxc_clone(do_start, handler, flags); + if (wants_to_map_ids && handler->conf->inherit_ns_fd[LXC_NS_USER] != -1) { + handler->pid = lxc_clone_special_userns(do_start, handler, flags); + joined_unpriv_userns = handler->conf->inherit_ns_fd[LXC_NS_USER]; + handler->conf->inherit_ns_fd[LXC_NS_USER] = -1; + } else + handler->pid = lxc_clone(do_start, handler, flags); if (handler->pid < 0) { SYSERROR("Failed to clone a new set of namespaces."); goto out_delete_net; @@ -1318,7 +1376,7 @@ static int lxc_spawn(struct lxc_handler *handler) if (!preserve_ns(handler->nsfd, handler->clone_flags | preserve_mask, handler->pid)) INFO("Failed to preserve namespace for lxc.hook.stop."); - if (attach_ns(saved_ns_fd)) + if (!wants_to_map_ids && attach_ns(saved_ns_fd)) WARN("Failed to restore saved namespaces."); lxc_sync_fini_child(handler); @@ -1329,7 +1387,7 @@ static int lxc_spawn(struct lxc_handler *handler) * mapped to something else on the host.) later to become a valid uid * again. */ - if (wants_to_map_ids && lxc_map_ids(id_map, handler->pid)) { + if (joined_unpriv_userns == -1 && wants_to_map_ids && lxc_map_ids(id_map, handler->pid)) { ERROR("Failed to set up id mapping."); goto out_delete_net; } @@ -1353,11 +1411,17 @@ static int lxc_spawn(struct lxc_handler *handler) goto out_delete_net; } - if (!cgroup_enter(handler)) - goto out_delete_net; - - if (!cgroup_chown(handler)) - goto out_delete_net; + if (joined_unpriv_userns != -1) { + if (!cgroup_chown(handler)) + goto out_delete_net; + if (enter_cgroup_in_ns(joined_unpriv_userns, handler)) + goto out_delete_net; + } else { + if (!cgroup_enter(handler)) + goto out_delete_net; + if (!cgroup_chown(handler)) + goto out_delete_net; + } if (failed_before_rename) goto out_delete_net; @@ -1437,10 +1501,14 @@ static int lxc_spawn(struct lxc_handler *handler) lxc_sync_fini(handler); handler->netnsfd = lxc_preserve_ns(handler->pid, "net"); + if (joined_unpriv_userns != -1) + close(joined_unpriv_userns); return 0; out_delete_net: + if (joined_unpriv_userns != -1) + close(joined_unpriv_userns); if (cgroups_connected) cgroup_disconnect(); if (handler->clone_flags & CLONE_NEWNET) diff --git a/src/lxc/start.h b/src/lxc/start.h index d8d06cfbf..c40c733d1 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -92,6 +92,6 @@ extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int __lxc_start(const char *, struct lxc_handler *, struct lxc_operations *, void *, const char *, bool); -extern void resolve_clone_flags(struct lxc_handler *handler); +extern void resolve_clone_flags(struct lxc_handler *handler, bool wants_to_map_ids); #endif diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c index ef45ffb08..c0f928e45 100644 --- a/src/lxc/tools/lxc_start.c +++ b/src/lxc/tools/lxc_start.c @@ -356,6 +356,34 @@ int main(int argc, char *argv[]) goto out; conf->inherit_ns_fd[i] = fd; } + if (!lxc_list_empty(&conf->id_map) && conf->inherit_ns_fd[LXC_NS_USER] == -1) { + /* + * If an unpriv user wants to share a netns, he can only do so + * if he is privileged toward the userns which owns the netns. So + * we have to enter the userns as well, first. Note - this means + * that if the user asks for --share-net=X --share-ipc=Y and X and Y + * have different owning user namespaces, this will likely fail. We + * could make the rare case of shared common ancestor work, but it's + * not worth it. After all noone's noticed that this was completely + * broken for unpriv users for years. + */ + for (i = 0; i < LXC_NS_MAX; i++) { + if (i == LXC_NS_USER) + continue; + if (conf->inherit_ns_fd[i] == -1) + continue; + // we need to inherit userns as well + int pid = pid_from_lxcname(my_args.share_ns[i], lxcpath); + if (pid < 1) + goto out; + int fd = open_ns(pid, "user"); + if (fd < 0) + goto out; + INFO("XXX Setting user ns in inherit_ns_fd"); + conf->inherit_ns_fd[LXC_NS_USER] = fd; + break; + } + } if (!my_args.daemonize) { c->want_daemonize(c, false);
_______________________________________________ lxc-devel mailing list [email protected] http://lists.linuxcontainers.org/listinfo/lxc-devel
