From: Serge Hallyn <serge.hal...@ubuntu.com> 1. lxcapi_create: don't try to unshare and mount for dir backed containers
It's unnecessary, and breaks unprivileged lxc-create (since unpriv users cannot yet unshare(CLONE_NEWNS)). 2. api_create: chown rootfs chown rootfs to the host uid to which container root will be mapped 3. create: run template in a mapped user ns 4. use (setuid-root) newxidmap to set id_map if we are not root This is needed to be able to set userns mappings as an unprivileged user, for unprivileged lxc-start. Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com> --- src/lxc/conf.c | 102 +++++++++++++++++++++++++----- src/lxc/conf.h | 4 ++ src/lxc/lxccontainer.c | 164 ++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 240 insertions(+), 30 deletions(-) diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 46320dd..f7ac030 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -2554,31 +2554,49 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) int ret = 0; enum idtype type; char *buf = NULL, *pos; + int am_root = (getuid() == 0); for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) { int left, fill; - - pos = buf; - lxc_list_for_each(iterator, idmap) { - /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */ - if (!buf) - buf = pos = malloc(4096); + int had_entry = 0; + if (!buf) { + buf = pos = malloc(4096); if (!buf) return -ENOMEM; + } + pos = buf; + if (!am_root) + pos += sprintf(buf, "new%cidmap %d ", + type == ID_TYPE_UID ? 'u' : 'g', + pid); + lxc_list_for_each(iterator, idmap) { + /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */ map = iterator->elem; - if (map->idtype == type) { - left = 4096 - (pos - buf); - fill = snprintf(pos, left, "%lu %lu %lu\n", - map->nsid, map->hostid, map->range); - if (fill <= 0 || fill >= left) - SYSERROR("snprintf failed, too many mappings"); - pos += fill; - } + if (map->idtype != type) + continue; + + had_entry = 1; + left = 4096 - (pos - buf); + fill = snprintf(pos, left, " %lu %lu %lu", map->nsid, + map->hostid, map->range); + if (fill <= 0 || fill >= left) + SYSERROR("snprintf failed, too many mappings"); + pos += fill; } - if (pos == buf) // no mappings were found + if (!had_entry) continue; - ret = write_id_mapping(type, pid, buf, pos-buf); + left = 4096 - (pos - buf); + fill = snprintf(pos, left, "\n"); + if (fill <= 0 || fill >= left) + SYSERROR("snprintf failed, too many mappings"); + pos += fill; + + if (am_root) + ret = write_id_mapping(type, pid, buf, pos-buf); + else + ret = system(buf); + if (ret) break; } @@ -2588,6 +2606,58 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) return ret; } +/* + * return the host uid to which the container root is mapped, or -1 on + * error + */ +int get_mapped_rootid(struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (map->nsid != 0) + continue; + return map->hostid; + } + return -1; +} + +bool hostid_is_mapped(int id, struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (id >= map->hostid && id < map->hostid + map->range) + return true; + } + return false; +} + +int find_unmapped_nsuid(struct lxc_conf *conf) +{ + struct lxc_list *it; + struct id_map *map; + uid_t freeid = 0; +again: + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + if (map->idtype != ID_TYPE_UID) + continue; + if (freeid >= map->nsid && freeid < map->nsid + map->range) { + freeid = map->nsid + map->range; + goto again; + } + } + return freeid; +} + int lxc_find_gateway_addresses(struct lxc_handler *handler) { struct lxc_list *network = &handler->conf->network; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index ed3240d..065b1df 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -331,4 +331,8 @@ extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath); extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf); + +extern int get_mapped_rootid(struct lxc_conf *conf); +extern int find_unmapped_nsuid(struct lxc_conf *conf); +extern bool hostid_is_mapped(int id, struct lxc_conf *conf); #endif diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index b0695bc..c167442 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -678,6 +678,49 @@ static const char *lxcapi_get_config_path(struct lxc_container *c); static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v); /* + * chown_mapped: for an unprivileged user with uid X to chown a dir + * to subuid Y, he needs to run chown as root in a userns where + * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid + * X. That way, the container root is privileged with respect to + * hostuid X, allowing him to do the chown. + */ +static int chown_mapped(int nsrootid, char *path) +{ + if (nsrootid < 0) + return nsrootid; + pid_t pid = fork(); + if (pid < 0) { + SYSERROR("Failed forking"); + return -1; + } + if (!pid) { + int hostuid = geteuid(), ret; + char map1[100], map2[100]; + char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "--", "chown", + "0", path, NULL}; + + // "b:0:nsrootid:1" + ret = snprintf(map1, 100, "b:0:%d:1", nsrootid); + if (ret < 0 || ret >= 100) { + ERROR("Error uid printing map string"); + return -1; + } + + // "b:hostuid:hostuid:1" + ret = snprintf(map2, 100, "b:%d:%d:1", hostuid, hostuid); + if (ret < 0 || ret >= 100) { + ERROR("Error uid printing map string"); + return -1; + } + + ret = execvp("lxc-usernsexec", args); + SYSERROR("Failed executing lxc-usernsexec"); + exit(1); + } + return wait_for_pid(pid); +} + +/* * do_bdev_create: thin wrapper around bdev_create(). Like bdev_create(), * it returns a mounted bdev on success, NULL on error. */ @@ -701,6 +744,25 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type, if (!bdev) return NULL; lxcapi_set_config_item(c, "lxc.rootfs", bdev->src); + + /* if we are not root, chown the rootfs dir to root in the + * target uidmap */ + + if (geteuid() != 0) { + int rootid; + if ((rootid = get_mapped_rootid(c->lxc_conf)) <= 0) { + ERROR("No mapping for container root"); + bdev_put(bdev); + return NULL; + } + ret = chown_mapped(rootid, bdev->dest); + if (ret < 0) { + ERROR("Error chowning %s to %d\n", bdev->dest, rootid); + bdev_put(bdev); + return NULL; + } + } + return bdev; } @@ -772,6 +834,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet int i; int ret, len, nargs = 0; char **newargv; + struct lxc_conf *conf = c->lxc_conf; if (quiet) { close(0); @@ -781,10 +844,6 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet open("/dev/null", O_RDWR); open("/dev/null", O_RDWR); } - if (unshare(CLONE_NEWNS) < 0) { - ERROR("error unsharing mounts"); - exit(1); - } src = c->lxc_conf->rootfs.path; /* @@ -801,9 +860,19 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet exit(1); } - if (bdev->ops->mount(bdev) < 0) { - ERROR("Error mounting rootfs"); - exit(1); + if (strcmp(bdev->type, "dir") != 0) { + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + if (bdev->ops->mount(bdev) < 0) { + ERROR("Error mounting rootfs"); + exit(1); + } + } else { // TODO come up with a better way here! + if (bdev->dest) + free(bdev->dest); + bdev->dest = strdup(bdev->src); } /* @@ -813,6 +882,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet if (argv) for (nargs = 0; argv[nargs]; nargs++) ; nargs += 4; // template, path, rootfs and name args + newargv = malloc(nargs * sizeof(*newargv)); if (!newargv) exit(1); @@ -856,8 +926,68 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet exit(1); newargv[nargs - 1] = NULL; + /* + * If we're running the template in a mapped userns, then + * we prepend the template command with: + * lxc-usernsexec <-m map1> ... <-m mapn> -- + */ + if (geteuid() != 0 && !lxc_list_empty(&conf->id_map)) { + int n2args = 1; + char **n2 = malloc(n2args * sizeof(*n2)); + struct lxc_list *it; + struct id_map *map; + + newargv[0] = tpath; + tpath = "lxc-usernsexec"; + n2[0] = "lxc-usernsexec"; + lxc_list_for_each(it, &conf->id_map) { + map = it->elem; + n2args += 2; + n2 = realloc(n2, n2args * sizeof(*n2)); + if (!n2) + exit(1); + n2[n2args-2] = "-m"; + n2[n2args-1] = malloc(200); + if (!n2[n2args-1]) + exit(1); + ret = snprintf(n2[n2args-1], 200, "%c:%lu:%lu:%lu", + map->idtype == ID_TYPE_UID ? 'u' : 'g', + map->nsid, map->hostid, map->range); + if (ret < 0 || ret >= 200) + exit(1); + } + bool hostid_mapped = hostid_is_mapped(geteuid(), conf); + int extraargs = hostid_mapped ? 1 : 3; + n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(*n2)); + if (!n2) + exit(1); + if (!hostid_mapped) { + int free_id = find_unmapped_nsuid(conf); + n2[n2args++] = "-m"; + if (free_id < 0) { + ERROR("Could not find free uid to map"); + exit(1); + } + n2[n2args++] = malloc(200); + if (!n2[n2args-1]) { + SYSERROR("out of memory"); + exit(1); + } + ret = snprintf(n2[n2args-1], 200, "u:%d:%d:1", + free_id, geteuid()); + if (ret < 0 || ret >= 200) { + ERROR("string too long"); + exit(1); + } + } + n2[n2args++] = "--"; + for (i = 0; i < nargs; i++) + n2[i + n2args] = newargv[i]; + free(newargv); + newargv = n2; + } /* execute */ - execv(tpath, newargv); + execvp(tpath, newargv); SYSERROR("failed to execute template %s", tpath); exit(1); } @@ -1949,15 +2079,21 @@ static int clone_update_rootfs(struct lxc_container *c0, if (pid > 0) return wait_for_pid(pid); - if (unshare(CLONE_NEWNS) < 0) { - ERROR("error unsharing mounts"); - exit(1); - } bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL); if (!bdev) exit(1); - if (bdev->ops->mount(bdev) < 0) - exit(1); + if (strcmp(bdev->type, "dir") != 0) { + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + if (bdev->ops->mount(bdev) < 0) + exit(1); + } else { // TODO come up with a better way + if (bdev->dest) + free(bdev->dest); + bdev->dest = strdup(bdev->src); + } if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) { /* Start of environment variable setup for hooks */ -- 1.8.1.2 ------------------------------------------------------------------------------ See everything from the browser to the database with AppDynamics Get end-to-end visibility with application monitoring from AppDynamics Isolate bottlenecks and diagnose root cause in seconds. Start your free trial of AppDynamics Pro today! http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel