Serge, On Fri, 2011-06-24 at 12:54 -0500, Serge Hallyn wrote: > I.e. with systemd or libcgroup. > > To do this, instead of looking for one cgroup called 'lxc' or > otherwise taking the first cgroup we find, we actually create a > container in every mounted cgroup fs. Right now it's done under the > root of each fs. We may want to put that under lxc, or, better yet, > make that configurable. > > Note the use of clone_children seems not quite right, but that's > not for this patch to fix. In particular, if clone_children is > not in the mntopts, we reject it. Yet later we try to set it > ourselves. I believe we should simply, if ns cgroup is not > composed, always try to set clone_children to 1. As it stands, > with libcgroup installed, I had to do
> cd /sys/fs/cgroup > for d in `/bin/ls`; do > echo 1 > $d/cgroup.clone_children > done Doing this step alone broke lxc totally for me, with or without the patch below. This was on Fedora 15 testing with lxc 0.7.4.2 as well as a build from git without the patch below and a build from git with the patch below. [root@forest mhw]# lxc-start -n Alcove lxc-start: failed to clone(0x6c020000): Invalid argument lxc-start: Invalid argument - failed to fork into a new namespace lxc-start: failed to spawn 'Alcove' lxc-start: No such file or directory - failed to remove cgroup '/sys/fs/cgroup/systemd/Alcove' If I set those cgroup.clone_children values back to 0, Alcove fires up normally with or without this patch with no "devices.*" entries in the config. If I have devices.allow and device.deny entries in that config, then your patch below works! And I see subdirectories in all the cgroup mounts for "Alcove". <random aside> [[ Daniel - Also, that nifty little option for ignoring other options in the config works like a charm so my parameters for this now work uncommented for my own high level scripts: -- # High level script control management... lxc-boot.onboot = true lxc-boot.disabled = false lxc-system.description = "Alcove system" -- Thank you... </random aside> ]] > But after that, 'lxc-start -n l1' worked like a charm. It also > continues to work with a single mount of cgroups under /cgroup. Bottom line. AFA starting a container, your patch works for me and fixes the systemd/libcgroup multi-mount problem but if and only if I do NOT do what you described above. That's disturbing. Why does it work one way for you and the exact opposite for me? We do seem to have a remaining problem though... Running or stopped, lxc-info seems broken: [root@forest mhw]# lxc-info -n Alcove Segmentation fault (core dumped) Daniel - I saw you put this patch off, postponing it. Are you awaiting testing like mine or is there some additional issues that are being missed (like this weirdism with cgroup.clone_children). That seg fault could be reason enough, too. Regards, Mike > Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com> > --- > src/lxc/cgroup.c | 207 > +++++++++++++++++++++++++++++++++-------------------- > src/lxc/cgroup.h | 2 +- > src/lxc/freezer.c | 2 +- > src/lxc/lxc.h | 8 +- > src/lxc/state.c | 2 +- > 5 files changed, 135 insertions(+), 86 deletions(-) > > diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c > index a068a01..ecba56e 100644 > --- a/src/lxc/cgroup.c > +++ b/src/lxc/cgroup.c > @@ -52,11 +52,10 @@ enum { > CGROUP_CLONE_CHILDREN, > }; > > -static int get_cgroup_mount(const char *mtab, char *mnt) > +static int get_cgroup_mount(const char *mtab, const char *subsystem, char > *mnt) > { > struct mntent *mntent; > FILE *file = NULL; > - int err = -1; > > file = setmntent(mtab, "r"); > if (!file) { > @@ -66,29 +65,24 @@ static int get_cgroup_mount(const char *mtab, char *mnt) > > while ((mntent = getmntent(file))) { > > - /* there is a cgroup mounted named "lxc" */ > - if (!strcmp(mntent->mnt_fsname, "lxc") && > - !strcmp(mntent->mnt_type, "cgroup")) { > - strcpy(mnt, mntent->mnt_dir); > - err = 0; > - break; > - } > - > - /* fallback to the first non-lxc cgroup found */ > - if (!strcmp(mntent->mnt_type, "cgroup") && err) { > + if (strcmp(mntent->mnt_type, "cgroup")) > + continue; > + if (!subsystem || hasmntopt(mntent, subsystem)) { > strcpy(mnt, mntent->mnt_dir); > - err = 0; > + fclose(file); > + DEBUG("using cgroup mounted at '%s'", mnt); > + return 0; > } > }; > > - DEBUG("using cgroup mounted at '%s'", mnt); > + DEBUG("Failed to find cgroup for %s\n", subsystem ? subsystem : > "(NULL)"); > > fclose(file); > > - return err; > + return -1; > } > > -static int get_cgroup_flags(const char *mtab, int *flags) > +static int get_cgroup_flags(const char *mtab, const char *mnt_dir, int > *flags) > { > struct mntent *mntent; > FILE *file = NULL; > @@ -103,38 +97,24 @@ static int get_cgroup_flags(const char *mtab, int *flags) > *flags = 0; > > while ((mntent = getmntent(file))) { > - > - /* there is a cgroup mounted named "lxc" */ > - if (!strcmp(mntent->mnt_fsname, "lxc") && > - !strcmp(mntent->mnt_type, "cgroup")) { > - > - if (hasmntopt(mntent, "ns")) > - *flags |= CGROUP_NS_CGROUP; > - > - if (hasmntopt(mntent, "clone_children")) > - *flags |= CGROUP_CLONE_CHILDREN; > - > + if (strcmp(mntent->mnt_type, "cgroup")) > + continue; > + if (strcmp(mntent->mnt_dir, mnt_dir)) > + continue; > + if (hasmntopt(mntent, "ns")) { > + *flags |= CGROUP_NS_CGROUP; > err = 0; > - break; > } > - > - /* fallback to the first non-lxc cgroup found */ > - if (!strcmp(mntent->mnt_type, "cgroup") && err) { > - > - if (hasmntopt(mntent, "ns")) > - *flags |= CGROUP_NS_CGROUP; > - > - if (hasmntopt(mntent, "clone_children")) > - *flags |= CGROUP_CLONE_CHILDREN; > - > + if (hasmntopt(mntent, "clone_children")) { > + *flags |= CGROUP_CLONE_CHILDREN; > err = 0; > } > - }; > - > - DEBUG("cgroup flags is 0x%x", *flags); > - > - fclose(file); > + fclose(file); > + DEBUG("cgroup flags for %s is 0x%x", mnt_dir, *flags); > + return err; > + } > > + fclose(file); > return err; > } > > @@ -199,18 +179,17 @@ static int cgroup_attach(const char *path, pid_t pid) > return ret; > } > > -int lxc_cgroup_create(const char *name, pid_t pid) > +/* > + * create a cgroup for the container in a particular subsystem. > + * XXX TODO we will of course want to use cgroup_path{subsystem}/lxc/name, > + * not just cgroup_path{subsystem}/name. > + */ > +int lxc_one_cgroup_create(const char *name, const char *cgmnt, pid_t pid) > { > - char cgmnt[MAXPATHLEN]; > char cgname[MAXPATHLEN]; > char clonechild[MAXPATHLEN]; > int flags; > > - if (get_cgroup_mount(MTAB, cgmnt)) { > - ERROR("cgroup is not mounted"); > - return -1; > - } > - > snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name); > > /* > @@ -222,7 +201,7 @@ int lxc_cgroup_create(const char *name, pid_t pid) > return -1; > } > > - if (get_cgroup_flags(MTAB, &flags)) { > + if (get_cgroup_flags(MTAB, cgmnt, &flags)) { > SYSERROR("failed to get cgroup flags"); > return -1; > } > @@ -266,16 +245,44 @@ int lxc_cgroup_create(const char *name, pid_t pid) > return 0; > } > > -int lxc_cgroup_destroy(const char *name) > +/* > + * for each mounted cgroup, create a cgroup for the container > + */ > +int lxc_cgroup_create(const char *name, pid_t pid) > { > - char cgmnt[MAXPATHLEN]; > - char cgname[MAXPATHLEN]; > + struct mntent *mntent; > + FILE *file = NULL; > + int ret, err = -1; > > - if (get_cgroup_mount(MTAB, cgmnt)) { > - ERROR("cgroup is not mounted"); > + DEBUG("%s: starting\n", __func__); > + file = setmntent(MTAB, "r"); > + if (!file) { > + SYSERROR("failed to open %s", MTAB); > return -1; > } > > + while ((mntent = getmntent(file))) { > + if (!strcmp(mntent->mnt_type, "cgroup")) { > + DEBUG("creating %s %s %d\n", name, mntent->mnt_dir, > pid); > + err = lxc_one_cgroup_create(name, mntent->mnt_dir, pid); > + if (ret) { > + fclose(file); > + return ret; > + } > + err = 0; > + } > + }; > + > + fclose(file); > + > + return err; > +} > + > + > +int lxc_one_cgroup_destroy(const char *cgmnt, const char *name) > +{ > + char cgname[MAXPATHLEN]; > + > snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, name); > if (rmdir(cgname)) { > SYSERROR("failed to remove cgroup '%s'", cgname); > @@ -287,37 +294,79 @@ int lxc_cgroup_destroy(const char *name) > return 0; > } > > -int lxc_cgroup_path_get(char **path, const char *name) > +/* > + * for each mounted cgroup, destroy the cgroup for the container > + */ > +int lxc_cgroup_destroy(const char *name) > { > - static char cgroup[MAXPATHLEN]; > - static const char* cgroup_cached = 0; > - static char buf[MAXPATHLEN]; > + struct mntent *mntent; > + FILE *file = NULL; > + int ret, err = -1; > > - if (!cgroup_cached) { > - if (get_cgroup_mount(MTAB, cgroup)) { > - ERROR("cgroup is not mounted"); > - return -1; > - } else { > - cgroup_cached = cgroup; > + file = setmntent(MTAB, "r"); > + if (!file) { > + SYSERROR("failed to open %s", MTAB); > + return -1; > + } > + > + while ((mntent = getmntent(file))) { > + if (!strcmp(mntent->mnt_type, "cgroup")) { > + DEBUG("destroying %s %s\n", mntent->mnt_dir, name); > + ret = lxc_one_cgroup_destroy(mntent->mnt_dir, name); > + if (ret) { > + fclose(file); > + return ret; > + } > + err = 0; > } > + } > + > + fclose(file); > + > + return err; > +} > +/* > + * lxc_cgroup_path_get: put into *path the pathname for > + * %subsystem and cgroup %name. If %subsystem is NULL, then > + * the first mounted cgroup will be used (for nr_tasks) > + */ > +int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name) > +{ > + static char buf[MAXPATHLEN]; > + static char retbuf[MAXPATHLEN]; > + > + /* what lxc_cgroup_set calls subsystem is actually the filename, i.e. > + 'devices.allow'. So for our purposee we trim it */ > + if (subsystem) { > + snprintf(retbuf, MAXPATHLEN, "%s", subsystem); > + char *s = index(retbuf, '.'); > + *s = '\0'; > + DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, > name); > } > + if (get_cgroup_mount(MTAB, subsystem ? retbuf : NULL, buf)) { > + ERROR("cgroup is not mounted"); > + return -1; > + } > + > + snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, name); > + > + DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem); > > - snprintf(buf, MAXPATHLEN, "%s/%s", cgroup_cached, name); > - *path = buf; > + *path = retbuf; > return 0; > } > > -int lxc_cgroup_set(const char *name, const char *subsystem, const char > *value) > +int lxc_cgroup_set(const char *name, const char *filename, const char *value) > { > int fd, ret; > - char *nsgroup; > + char *dirpath; > char path[MAXPATHLEN]; > > - ret = lxc_cgroup_path_get(&nsgroup, name); > + ret = lxc_cgroup_path_get(&dirpath, filename, name); > if (ret) > return -1; > > - snprintf(path, MAXPATHLEN, "%s/%s", nsgroup, subsystem); > + snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename); > > fd = open(path, O_WRONLY); > if (fd < 0) { > @@ -337,18 +386,18 @@ out: > return ret; > } > > -int lxc_cgroup_get(const char *name, const char *subsystem, > +int lxc_cgroup_get(const char *name, const char *filename, > char *value, size_t len) > { > int fd, ret = -1; > - char *nsgroup; > + char *dirpath; > char path[MAXPATHLEN]; > > - ret = lxc_cgroup_path_get(&nsgroup, name); > + ret = lxc_cgroup_path_get(&dirpath, filename, name); > if (ret) > return -1; > > - snprintf(path, MAXPATHLEN, "%s/%s", nsgroup, subsystem); > + snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename); > > fd = open(path, O_RDONLY); > if (fd < 0) { > @@ -366,16 +415,16 @@ int lxc_cgroup_get(const char *name, const char > *subsystem, > > int lxc_cgroup_nrtasks(const char *name) > { > - char *nsgroup; > + char *dpath; > char path[MAXPATHLEN]; > int pid, ret, count = 0; > FILE *file; > > - ret = lxc_cgroup_path_get(&nsgroup, name); > + ret = lxc_cgroup_path_get(&dpath, NULL, name); > if (ret) > return -1; > > - snprintf(path, MAXPATHLEN, "%s/tasks", nsgroup); > + snprintf(path, MAXPATHLEN, "%s/tasks", dpath); > > file = fopen(path, "r"); > if (!file) { > diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h > index 8607fa8..f6df8c5 100644 > --- a/src/lxc/cgroup.h > +++ b/src/lxc/cgroup.h > @@ -28,6 +28,6 @@ > struct lxc_handler; > int lxc_cgroup_create(const char *name, pid_t pid); > int lxc_cgroup_destroy(const char *name); > -int lxc_cgroup_path_get(char **path, const char *name); > +int lxc_cgroup_path_get(char **path, const char *subsystem, const char > *name); > int lxc_cgroup_nrtasks(const char *name); > #endif > diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c > index 07eede5..94984a3 100644 > --- a/src/lxc/freezer.c > +++ b/src/lxc/freezer.c > @@ -45,7 +45,7 @@ static int freeze_unfreeze(const char *name, int freeze) > char tmpf[32]; > int fd, ret; > > - ret = lxc_cgroup_path_get(&nsgroup, name); > + ret = lxc_cgroup_path_get(&nsgroup, "freezer", name); > if (ret) > return -1; > > diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h > index a091baa..04df1c6 100644 > --- a/src/lxc/lxc.h > +++ b/src/lxc/lxc.h > @@ -114,22 +114,22 @@ extern lxc_state_t lxc_state(const char *name); > * Set a specified value for a specified subsystem. The specified > * subsystem must be fully specified, eg. "cpu.shares" > * @name : the name of the container > - * @subsystem : the subsystem > + * @filename : the cgroup attribute filename > * @value : the value to be set > * Returns 0 on success, < 0 otherwise > */ > -extern int lxc_cgroup_set(const char *name, const char *subsystem, const > char *value); > +extern int lxc_cgroup_set(const char *name, const char *filename, const char > *value); > > /* > * Get a specified value for a specified subsystem. The specified > * subsystem must be fully specified, eg. "cpu.shares" > * @name : the name of the container > - * @subsystem : the subsystem > + * @filename : the cgroup attribute filename > * @value : the value to be set > * @len : the len of the value variable > * Returns the number of bytes read, < 0 on error > */ > -extern int lxc_cgroup_get(const char *name, const char *subsystem, > +extern int lxc_cgroup_get(const char *name, const char *filename, > char *value, size_t len); > > /* > diff --git a/src/lxc/state.c b/src/lxc/state.c > index 6720011..b435eba 100644 > --- a/src/lxc/state.c > +++ b/src/lxc/state.c > @@ -71,7 +71,7 @@ static int freezer_state(const char *name) > FILE *file; > int err; > > - err = lxc_cgroup_path_get(&nsgroup, name); > + err = lxc_cgroup_path_get(&nsgroup, "freezer", name); > if (err) > return -1; > -- Michael H. Warfield (AI4NB) | (770) 985-6132 | m...@wittsend.com /\/\|=mhw=|\/\/ | (678) 463-0932 | http://www.wittsend.com/mhw/ NIC whois: MHW9 | An optimist believes we live in the best of all PGP Key: 0x674627FF | possible worlds. A pessimist is sure of it!
signature.asc
Description: This is a digitally signed message part
------------------------------------------------------------------------------ All the data continuously generated in your IT infrastructure contains a definitive record of customers, application performance, security threats, fraudulent activity and more. Splunk takes this data and makes sense of it. Business sense. IT sense. Common sense.. http://p.sf.net/sfu/splunk-d2d-c1
_______________________________________________ Lxc-users mailing list Lxc-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-users