On Wed, Apr 29, 2015 at 02:56:24PM +0300, Cyrill Gorcunov wrote: > Here we rip off all the virtualization code we introduced into kernel to > behave close to rhel6. > > Because we're trying a new concept (bindmounting from the node) it is > no longer needed. > > Now some details: > > - drop cgroup_show_path -- we don't hide VEID in /proc/self/cgroup output, > it doesn't break criu so no need to carry it, same applies to changes > in cgroup_path; > > - because we drop virtualization of systemd -- disable creation of new > hierarchies in container: we don't support it, neither we need it. The > primary reason why we allowed new hierarchies in container was that > CRIU has been running restore procedure inside VE but now we initiate > restore from VE0, so we can safely disable new hierarchies; > > - in cgroup_addrm_files go back to former RHEL7 code; if we need something > special here it must be reviewed carefully and separately; > > - no need to hide /proc/cgroups in VE, there is no sensible info present.
Again, not everything is removed. E.g. we do not longer need cgroup_kernel_destroy and ve->ve_cgroup_head. Please check out c2ac6df22b20389ae2d0af49c436b00ff3243e89 ("VE: virtualize cgroups") and fix accordingly. > > Signed-off-by: Cyrill Gorcunov <gorcu...@odin.com> > CC: Vladimir Davydov <vdavy...@odin.com> > CC: Konstantin Khorenko <khore...@odin.com> > CC: Pavel Emelyanov <xe...@odin.com> > CC: Andrey Vagin <ava...@odin.com> > --- > include/linux/cgroup.h | 3 - > kernel/cgroup.c | 116 > ++++++++----------------------------------------- > 2 files changed, 19 insertions(+), 100 deletions(-) > > Index: linux-pcs7.git/include/linux/cgroup.h > =================================================================== > --- linux-pcs7.git.orig/include/linux/cgroup.h > +++ linux-pcs7.git/include/linux/cgroup.h > @@ -191,9 +191,6 @@ enum { > /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ > CGRP_SANE_BEHAVIOR, > CGRP_SELF_DESTRUCTION, > - > - /* container virtualization */ > - CGRP_VE_TOP_CGROUP_VIRTUAL, > }; > > struct cgroup_name { > Index: linux-pcs7.git/kernel/cgroup.c > =================================================================== > --- linux-pcs7.git.orig/kernel/cgroup.c > +++ linux-pcs7.git/kernel/cgroup.c > @@ -1125,18 +1125,6 @@ static int cgroup_show_options(struct se > return 0; > } > > -static int cgroup_show_path(struct seq_file *m, struct dentry *root) > -{ > - struct ve_struct *ve = get_exec_env(); > - struct cgroup *cgrp = __d_cgrp(root); > - > - if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, > &cgrp->flags)) > - seq_puts(m, "/"); > - else > - seq_dentry(m, root, " \t\n\\"); > - return 0; > -} > - > /* > * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call > * with cgroup_mutex held to protect the subsys[] array. This function takes > @@ -1299,26 +1287,6 @@ static int parse_cgroupfs_options(char * > if (!opts->subsys_mask && !opts->name) > return -EINVAL; > > - /* virtualize 'systemd' hierarchy */ > - if (!ve_is_super(get_exec_env()) && !opts->subsys_mask && opts->name && > !strcmp(opts->name, "systemd")) > - set_bit(CGRP_ROOT_VIRTUAL, &opts->flags); > - > - /* forbid non-virtualized hierarchies in containers */ > - if (!ve_is_super(get_exec_env()) && !test_bit(CGRP_ROOT_VIRTUAL, > &opts->flags)) { > - WARN_ONCE(1, "Allow non-virtualized hierarchies for CRIU > sake\n"); > - /* > - * FIXME > - * > - * We need to somehow limit this ability for CRIU only, because > - * we've to run restore procedure from inside of VE cgroup > - * (otherwise a number of get_exec_env() in network code > - * won't work as needed). > - * > - * -- cyrillos > - */ > - /* return opts->subsys_mask ? -ENOENT : -EPERM; */ > - } > - > /* > * Grab references on all the modules we'll need, so the subsystems > * don't dance around before rebind_subsystems attaches them. This may > @@ -1441,7 +1409,6 @@ static const struct super_operations cgr > .drop_inode = generic_delete_inode, > .show_options = cgroup_show_options, > .remount_fs = cgroup_remount, > - .show_path = cgroup_show_path, > }; > > static void init_cgroup_housekeeping(struct cgroup *cgrp) > @@ -1621,7 +1588,6 @@ static struct dentry *cgroup_mount(struc > struct super_block *sb; > struct cgroupfs_root *new_root; > struct inode *inode; > - struct dentry *root_dentry; > > /* First find the desired set of subsystems */ > if (!(flags & MS_KERNMOUNT)) { > @@ -1668,6 +1634,17 @@ static struct dentry *cgroup_mount(struc > > BUG_ON(sb->s_root != NULL); > > +#ifdef CONFIG_VE > + /* > + * Don't allow to create new hierarchies in container, > + * we don't support them. > + */ > + if (!ve_is_super(get_exec_env())) { > + ret = -EACCES; > + goto drop_new_super; > + } > +#endif > + > ret = cgroup_get_rootdir(sb); > if (ret) > goto drop_new_super; > @@ -1727,11 +1704,9 @@ static struct dentry *cgroup_mount(struc > BUG_ON(!list_empty(&root_cgrp->children)); > BUG_ON(root->number_of_cgroups != 1); > > - if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) { > - root_cgrp->release_agent = opts.release_agent; > - root_cgrp->cgroup_ve = get_exec_env(); > - opts.release_agent = NULL; > - } > + root_cgrp->release_agent = opts.release_agent; > + root_cgrp->cgroup_ve = get_exec_env(); > + opts.release_agent = NULL; > > cred = override_creds(&init_cred); > cgroup_populate_dir(root_cgrp, true, root->subsys_mask); > @@ -1760,40 +1735,9 @@ static struct dentry *cgroup_mount(struc > drop_parsed_module_refcounts(opts.subsys_mask); > } > > - if (!test_bit(CGRP_ROOT_VIRTUAL, &opts.flags)) { > - root_dentry = dget(sb->s_root); > - } else { > - struct ve_struct *ve = get_exec_env(); > - struct cgroup *top_cgrp; > - > - top_cgrp = cgroup_kernel_open(&root->top_cgroup, 0, > ve->ve_name); > - ret = PTR_ERR(top_cgrp); > - if (IS_ERR(top_cgrp)) > - goto drop_new_super; > - > - /* create fake root-cgroup in virtualized hierarchy */ > - if (top_cgrp == NULL) { > - top_cgrp = cgroup_kernel_open(&root->top_cgroup, > CGRP_CREAT, ve->ve_name); > - ret = PTR_ERR(top_cgrp); > - if (IS_ERR(top_cgrp)) > - goto drop_new_super; > - > - mutex_lock(&cgroup_mutex); > - top_cgrp->cgroup_ve = ve; > - top_cgrp->release_agent = opts.release_agent; > - opts.release_agent = NULL; > - set_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, &top_cgrp->flags); > - mutex_unlock(&cgroup_mutex); > - } > - > - /* mount it as bindmount to fist-level fake root-cgroup */ > - root_dentry = dget(top_cgrp->dentry); > - cgroup_kernel_close(top_cgrp); > - } > - > kfree(opts.release_agent); > kfree(opts.name); > - return root_dentry; > + return dget(sb->s_root); > > unlock_drop: > mutex_unlock(&cgroup_root_mutex); > @@ -1881,7 +1825,6 @@ static struct kobject *cgroup_kobj; > */ > int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) > { > - struct ve_struct *ve = get_exec_env(); > int ret = -ENAMETOOLONG; > char *start; > > @@ -1899,16 +1842,6 @@ int cgroup_path(const struct cgroup *cgr > const char *name = cgroup_name(cgrp); > int len; > > - /* Hide fake root-cgroup in virtualized hierarchy */ > - if (!ve_is_super(ve) && test_bit(CGRP_VE_TOP_CGROUP_VIRTUAL, > &cgrp->flags)) { > - if (*start != '/') { > - if (--start < buf) > - goto out; > - *start = '/'; > - } > - break; > - } > - > len = strlen(name); > if ((start -= len) < buf) > goto out; > @@ -2843,9 +2776,9 @@ static int cgroup_addrm_files(struct cgr > /* does cft->flags tell us to skip this file on @cgrp? */ > if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) > continue; > - if ((cft->flags & CFTYPE_NOT_ON_ROOT) && > &cgrp->root->top_cgroup == cgrp) > + if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) > continue; > - if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && > &cgrp->root->top_cgroup != cgrp) > + if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) > continue; > > if (is_add) { > @@ -4313,13 +4246,8 @@ static long cgroup_create(struct cgroup > cgrp->parent = parent; > cgrp->root = parent->root; > > - if (test_bit(CGRP_ROOT_VIRTUAL, &root->flags) && parent == > &root->top_cgroup) { > - cgrp->cgroup_ve = get_exec_env(); > - list_add(&cgrp->cgroup_ve_list, > &cgrp->cgroup_ve->ve_cgroup_head); > - } else { > - cgrp->cgroup_ve = parent->cgroup_ve; > - list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list); > - } > + cgrp->cgroup_ve = parent->cgroup_ve; > + list_add(&cgrp->cgroup_ve_list, &parent->cgroup_ve_list); > > if (notify_on_release(parent)) > set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); > @@ -4926,14 +4854,8 @@ out: > static int proc_cgroupstats_show(struct seq_file *m, void *v) > { > int i; > - struct ve_struct *ve = get_exec_env(); > > seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); > - > - /* cgset wants to read /proc/cgroups and it's used for starting CT */ > - if (!ve_is_super(ve) && ve->is_running) > - return 0; > - > /* > * ideally we don't want subsystems moving around while we do this. > * cgroup_mutex is also necessary to guarantee an atomic snapshot of > _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel