On 12/06/2012 10:02 AM, Serge Hallyn wrote: > The rootfs will need to be chowned to the mapped userids, which can > be done with the /usr/bin/uidmapshift tool shipped with the nsexec > package in ppa:serge-hallyn/userns-natty. > The container config supports new entries of the form: > lxc.id_map = U 100000 0 10000 > lxc.id_map = G 100000 0 10000 > meaning map 'virtual' uids (in the container) 0-10000 to uids > 100000-110000 on the host, and same for gids. So long as there are > mappings specified in the container config, then CONFIG_NEWUSER will > be used when the container is cloned. This means that container > setup is no longer done with root privilege on the host, only root > privilege in the container. Therefore cgroup setup is moved from the > init task to the monitor task. > > To use this patchset, you currently need to either use the raring > kernel at ppa:serge-hallyn/usern-natty, or build your own kernel > from either git://kernel.ubuntu.com/serge/quantal-userns.git branch > master-next.dec3.userns or branch userns-always-map-user-v76 at > git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git > plus a patch enabling tmpfs mounts in userns. > > You also need to chown the files in the container rootfs into the > mapped range. There is a utility at > https://code.launchpad.net/~serge-hallyn/+junk/nsexec to do this. > uidmapshift does the chowning, while the container-userns-convert > script nicely wraps that program. So I simply > > sudo lxc-create -t ubuntu -n r1 > sudo container-userns-convert r1 200000 > > will create a container which is shifted so uid 0 in the container > is uid 200000 on the host. > > TODO: when doing setuid(0), need to only do that if 0 is one of the > ids we map to. Similarly, when dropping capabilities, need to only > not do that if 0 is one of the ids we map to. > > Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com>
Nice, the code is much simpler than I expected. I only read through pretty briefly and left a few comments, mostly questions and cosmetics than problems with the code itself. I'm also wondering, what's the state of lxc-attach wrt user namespaces? does it need any updating too? > --- > src/lxc/conf.c | 141 > +++++++++++++++++++++++++++++++++++++++++++++++++---- > src/lxc/conf.h | 26 ++++++++++ > src/lxc/confile.c | 60 +++++++++++++++++++++++ > src/lxc/start.c | 35 +++++++++++++ > 4 files changed, 253 insertions(+), 9 deletions(-) > > diff --git a/src/lxc/conf.c b/src/lxc/conf.c > index 79d96d7..1a619d0 100644 > --- a/src/lxc/conf.c > +++ b/src/lxc/conf.c > @@ -1221,7 +1221,7 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs, > return 0; > } > > -static int setup_cgroup(const char *name, struct lxc_list *cgroups) > +int setup_cgroup(const char *name, struct lxc_list *cgroups) Why the change? > { > struct lxc_list *iterator; > struct lxc_cgroup *cg; > @@ -1882,6 +1882,7 @@ struct lxc_conf *lxc_conf_init(void) > lxc_list_init(&new->network); > lxc_list_init(&new->mount_list); > lxc_list_init(&new->caps); > + lxc_list_init(&new->id_map); > for (i=0; i<NUM_LXC_HOOKS; i++) > lxc_list_init(&new->hooks[i]); > #if HAVE_APPARMOR > @@ -2256,6 +2257,44 @@ int lxc_assign_network(struct lxc_list *network, pid_t > pid) > return 0; > } > > +int add_id_mapping(enum idtype idtype, pid_t pid, uid_t host_start, uid_t > ns_start, int range) > +{ > + char path[PATH_MAX]; > + int ret; > + FILE *f; > + > + ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == > ID_TYPE_UID ? 'u' : 'g'); > + if (ret < 0 || ret >= PATH_MAX) { > + fprintf(stderr, "%s: path name too long", __func__); > + return -E2BIG; > + } > + f = fopen(path, "w"); > + if (!f) { > + perror("open"); > + return -EINVAL; > + } > + ret = fprintf(f, "%d %d %d", ns_start, host_start, range); > + if (ret < 0) > + perror("write"); > + fclose(f); > + return ret < 0 ? ret : 0; > +} > + > +int lxc_map_ids(struct lxc_list *idmap, pid_t pid) > +{ > + struct lxc_list *iterator; > + struct id_map *map; > + int ret = 0; > + > + lxc_list_for_each(iterator, idmap) { > + map = iterator->elem; > + ret = add_id_mapping(map->idtype, pid, map->hostid, map->nsid, > map->range); > + if (ret) > + break; > + } > + return ret; > +} > + > int lxc_find_gateway_addresses(struct lxc_handler *handler) > { > struct lxc_list *network = &handler->conf->network; > @@ -2364,6 +2403,93 @@ void lxc_delete_tty(struct lxc_tty_info *tty_info) > tty_info->nbtty = 0; > } > > +/* > + * given a host uid, return the ns uid if it is mapped. > + * if it is not mapped, return the original host id. > + */ > +static int shiftid(struct lxc_conf *c, int uid, enum idtype w) > +{ > + struct lxc_list *iterator; > + struct id_map *map; > + int low, high; > + > + lxc_list_for_each(iterator, &c->id_map) { > + map = iterator->elem; > + if (map->idtype != w) > + continue; > + > + low = map->nsid; > + high = map->nsid + map->range; > + if (uid < low || uid >= high) > + continue; > + > + return uid - low + map->hostid; > + } > + > + return uid; > +} > + > +/* > + * Take a pathname for a file created on the host, and map the uid and gid > + * into the container if needed. (Used for ttys) > + */ > +static int uid_shift_file(char *path, struct lxc_conf *c) > +{ > + struct stat statbuf; > + int newuid, newgid; > + > + if (stat(path, &statbuf)) { > + SYSERROR("stat(%s)", path); > + return -1; > + } > + > + newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID); > + newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID); > + if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) { > + DEBUG("chowning %s from %d:%d to %d:%d\n", path, > statbuf.st_uid, statbuf.st_gid, newuid, newgid); > + if (chown(path, newuid, newgid)) { > + SYSERROR("chown(%s)", path); > + return -1; > + } > + } > + return 0; > +} > + > +int uid_shift_ttys(int pid, struct lxc_conf *conf) > +{ > + int i, ret; > + struct lxc_tty_info *tty_info = &conf->tty_info; > + char path[MAXPATHLEN]; > + char *ttydir = conf->ttydir; > + > + if (!conf->rootfs.path) > + return 0; > + /* first the console */ > + ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, > ttydir ? ttydir : ""); > + if (ret < 0 || ret >= sizeof(path)) { > + ERROR("console path too long\n"); > + return -1; > + } > + if (uid_shift_file(path, conf)) { > + DEBUG("Failed to chown the console %s.\n", path); > + return -1; > + } > + for (i=0; i< tty_info->nbtty; i++) { > + ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d", > + pid, ttydir ? ttydir : "", i + 1); > + if (ret < 0 || ret >= sizeof(path)) { > + ERROR("pathname too long for ttys"); > + return -1; > + } > + if (uid_shift_file(path, conf)) { > + DEBUG("Failed to chown pty %s.\n", path); > + return -1; > + } > + } > + > + return 0; > +} > + > int lxc_setup(const char *name, struct lxc_conf *lxc_conf) > { > #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */ > @@ -2419,11 +2545,6 @@ int lxc_setup(const char *name, struct lxc_conf > *lxc_conf) > } > } > > - if (setup_cgroup(name, &lxc_conf->cgroup)) { > - ERROR("failed to setup the cgroups for '%s'", name); > - return -1; > - } > - > if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, > lxc_conf->ttydir)) { > ERROR("failed to setup the console for '%s'", name); > return -1; > @@ -2467,9 +2588,11 @@ int lxc_setup(const char *name, struct lxc_conf > *lxc_conf) > return -1; > } > > - if (setup_caps(&lxc_conf->caps)) { > - ERROR("failed to drop capabilities"); > - return -1; > + if (lxc_list_empty(&lxc_conf->id_map)) { > + if (setup_caps(&lxc_conf->caps)) { > + ERROR("failed to drop capabilities"); > + return -1; > + } > } Why can't we drop capabilities in a user namespace? > NOTICE("'%s' is setup.", name); > diff --git a/src/lxc/conf.h b/src/lxc/conf.h > index 694bce4..97b9274 100644 > --- a/src/lxc/conf.h > +++ b/src/lxc/conf.h > @@ -137,6 +137,26 @@ struct lxc_cgroup { > char *value; > }; > > +enum idtype { > + ID_TYPE_UID, > + ID_TYPE_GID > +}; > + > +/* > + * id_map is an id map entry. Form in confile is: > + * lxc.id_map = U 9800 0 100 > + * lxc.id_map = U 9900 1000 100 > + * lxc.id_map = G 9800 0 100 > + * lxc.id_map = G 9900 1000 100 > + * meaning the container can use uids and gids 0-100 and 1000-1100, > + * with uid 0 mapping to uid 9800 on the host, and gid 1000 to > + * gid 9900 on the host. > + */ > +struct id_map { > + enum idtype idtype; > + int hostid, nsid, range; > +}; > + > /* > * Defines a structure containing a pty information for > * virtualizing a tty > @@ -220,6 +240,7 @@ struct lxc_conf { > int personality; > struct utsname *utsname; > struct lxc_list cgroup; > + struct lxc_list id_map; > struct lxc_list network; > struct lxc_list mount_list; > struct lxc_list caps; > @@ -256,6 +277,7 @@ extern int pin_rootfs(const char *rootfs); > extern int lxc_create_network(struct lxc_handler *handler); > extern void lxc_delete_network(struct lxc_handler *handler); > extern int lxc_assign_network(struct lxc_list *networks, pid_t pid); > +extern int lxc_map_ids(struct lxc_list *idmap, pid_t pid); > extern int lxc_find_gateway_addresses(struct lxc_handler *handler); > > extern int lxc_create_tty(const char *name, struct lxc_conf *conf); > @@ -268,6 +290,10 @@ extern int lxc_clear_cgroups(struct lxc_conf *c, const > char *key); > extern int lxc_clear_mount_entries(struct lxc_conf *c); > extern int lxc_clear_hooks(struct lxc_conf *c, const char *key); > > +extern int setup_cgroup(const char *name, struct lxc_list *cgroups); > + > +extern int uid_shift_ttys(int pid, struct lxc_conf *conf); > + > /* > * Configure the container from inside > */ > diff --git a/src/lxc/confile.c b/src/lxc/confile.c > index a64ae09..1fa6189 100644 > --- a/src/lxc/confile.c > +++ b/src/lxc/confile.c > @@ -55,6 +55,7 @@ static int config_ttydir(const char *, const char *, struct > lxc_conf *); > static int config_aa_profile(const char *, const char *, struct lxc_conf *); > #endif > static int config_cgroup(const char *, const char *, struct lxc_conf *); > +static int config_idmap(const char *, const char *, struct lxc_conf *); > static int config_loglevel(const char *, const char *, struct lxc_conf *); > static int config_logfile(const char *, const char *, struct lxc_conf *); > static int config_mount(const char *, const char *, struct lxc_conf *); > @@ -94,6 +95,7 @@ static struct lxc_config_t config[] = { > { "lxc.aa_profile", config_aa_profile }, > #endif > { "lxc.cgroup", config_cgroup }, > + { "lxc.id_map", config_idmap }, > { "lxc.loglevel", config_loglevel }, > { "lxc.logfile", config_logfile }, > { "lxc.mount", config_mount }, > @@ -1021,6 +1023,64 @@ out: > return -1; > } > > +static int config_idmap(const char *key, const char *value, struct lxc_conf > *lxc_conf) > +{ > + char *token = "lxc.id_map"; > + char *subkey; > + struct lxc_list *idmaplist = NULL; > + struct id_map *idmap = NULL; > + int hostid, nsid, range; > + char type; > + int ret; > + > + subkey = strstr(key, token); > + > + if (!subkey) > + return -1; > + > + if (!strlen(subkey)) > + return -1; > + > + idmaplist = malloc(sizeof(*idmaplist)); > + if (!idmaplist) > + goto out; > + > + idmap = malloc(sizeof(*idmap)); > + if (!idmap) > + goto out; > + memset(idmap, 0, sizeof(*idmap)); > + > + idmaplist->elem = idmap; > + > + lxc_list_add_tail(&lxc_conf->id_map, idmaplist); > + > + ret = sscanf(value, "%c %d %d %d", &type, &hostid, &nsid, &range); > + if (ret != 4) > + goto out; > + INFO("read uid map: type %c hostid %d nsid %d range %d", type, hostid, > nsid, range); > + if (type == 'U') > + idmap->idtype = ID_TYPE_UID; > + else if (type == 'G') > + idmap->idtype = ID_TYPE_GID; > + else > + goto out; > + idmap->hostid = hostid; > + idmap->nsid = nsid; > + idmap->range = range; > + > + return 0; > + > +out: > + if (idmaplist) > + free(idmaplist); > + > + if (idmap) { > + free(idmap); > + } ^ code style isn't really consistent here :) > + return -1; > +} > + > static int config_path_item(const char *key, const char *value, > struct lxc_conf *lxc_conf, char **conf_item) > { > diff --git a/src/lxc/start.c b/src/lxc/start.c > index 3e26b27..8d03b69 100644 > --- a/src/lxc/start.c > +++ b/src/lxc/start.c > @@ -542,6 +542,22 @@ static int do_start(void *data) > if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE)) > return -1; > > + /* > + * if we are in a new user namespace, become root there to have > + * privilege over our namespace > + */ > + if (!lxc_list_empty(&handler->conf->id_map)) { > + NOTICE("switching to gid/uid 0 in new user namespace"); > + if (setgid(0)) { > + SYSERROR("setgid"); > + exit(1); > + } > + if (setuid(0)) { > + SYSERROR("setuid"); > + exit(1); > + } > + } > + > if (handler->conf->need_utmp_watch) { > if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) { > SYSERROR("failed to remove CAP_SYS_BOOT capability"); > @@ -589,6 +605,10 @@ int lxc_spawn(struct lxc_handler *handler) > return -1; > > handler->clone_flags = > CLONE_NEWUTS|CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS; > + if (!lxc_list_empty(&handler->conf->id_map)) { > + INFO("Cloning a new user namespace"); > + handler->clone_flags |= CLONE_NEWUSER; > + } > if (!lxc_list_empty(&handler->conf->network)) { > > handler->clone_flags |= CLONE_NEWNET; > @@ -650,12 +670,27 @@ int lxc_spawn(struct lxc_handler *handler) > } > } > > + if (lxc_map_ids(&handler->conf->id_map, handler->pid)) { > + ERROR("failed to set up id mapping"); > + goto out_delete_net; > + } > + > /* Tell the child to continue its initialization and wait for > * it to exec or return an error > */ > if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE)) > return -1; > > + if (setup_cgroup(name, &handler->conf->cgroup)) { > + ERROR("failed to setup the cgroups for '%s'", name); > + goto out_delete_net; > + } > + > + /* If child is in a fresh user namespace, chown his ptys for > + * him */ > + if (uid_shift_ttys(handler->pid, handler->conf)) > + DEBUG("Failed to chown ptys.\n"); > + > if (handler->ops->post_start(handler, handler->data)) > goto out_abort; > > -- Stéphane Graber Ubuntu developer http://www.ubuntu.com
signature.asc
Description: OpenPGP digital signature
------------------------------------------------------------------------------ LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial Remotely access PCs and mobile devices and provide instant support Improve your efficiency, and focus on delivering more value-add services Discover what IT Professionals Know. Rescue delivers http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel