On 10.02.2021 13:03, Valeriy Vdovin wrote: > Due to virtualization of release_agent cgroup property, cgroup1_show_options > has become more complex. > struct cgroup_root is one of the arguments to that function, it was previously > holding the value of release_agent. But now this property is per-ve AND > per-cgroup. That's why to find the right release_agent value, the code should > convert cgroup_root into one specific cgroup that is a 'virtual cgroup root' > of > a container, represented by the current VE. Getting ve is trivial but cgroup > can be found by a helper function that will iterate css_set links under > cgroup_mutex lock. There is a lock inversion problem when using cgroup_mutex > in cgroup1_show_options, lockdep shows cgroup_mutex conflicts with > kernfs_node->dep_map. > This can be solved easily by converting per-cgroup data structure in VE into > per-cgroup-root. This way we can provide ve_set(get)release_agent_path > directly > with struct cgroup_root agrument. For each cgroup hierarchy there is only one > root and for each VE there can only be one virtual root either, that's why > it is safe to just use cgroup_root as a key to find the proper release_agent > path in each VE. > > Signed-off-by: Valeriy Vdovin <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]> > --- > include/linux/ve.h | 8 ++++--- > kernel/cgroup/cgroup-v1.c | 44 +++++++++++---------------------------- > kernel/cgroup/cgroup.c | 5 +++-- > kernel/ve/ve.c | 19 +++++++---------- > 4 files changed, 27 insertions(+), 49 deletions(-) > > diff --git a/include/linux/ve.h b/include/linux/ve.h > index 7cef4b39847e..3b487f8a4a50 100644 > --- a/include/linux/ve.h > +++ b/include/linux/ve.h > @@ -145,12 +145,14 @@ extern int nr_ve; > void ve_add_to_release_list(struct cgroup *cgrp); > void ve_rm_from_release_list(struct cgroup *cgrp); > > -int ve_set_release_agent_path(struct cgroup *cgroot, > +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root > *cgroot, > const char *release_agent); > > -const char *ve_get_release_agent_path(struct cgroup *cgrp_root); > +const char *ve_get_release_agent_path(struct ve_struct *ve, > + struct cgroup_root *cgroot); > > -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp); > +void ve_cleanup_per_cgroot_data(struct ve_struct *ve, > + struct cgroup_root *cgrp); > > extern struct ve_struct *get_ve(struct ve_struct *ve); > extern void put_ve(struct ve_struct *ve); > diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c > index 46be2f688503..993ac38b895f 100644 > --- a/kernel/cgroup/cgroup-v1.c > +++ b/kernel/cgroup/cgroup-v1.c > @@ -577,7 +577,8 @@ static ssize_t cgroup_release_agent_write(struct > kernfs_open_file *of, > } > > if (root_cgrp->ve_owner) > - ret = ve_set_release_agent_path(root_cgrp, strstrip(buf)); > + ret = ve_set_release_agent_path(root_cgrp->ve_owner, > + root_cgrp->root, strstrip(buf)); > else > ret = -ENODEV; > > @@ -598,7 +599,9 @@ static int cgroup_release_agent_show(struct seq_file > *seq, void *v) > root_cgrp = cgroup_get_local_root(cgrp); > if (root_cgrp->ve_owner) { > rcu_read_lock(); > - release_agent = ve_get_release_agent_path(root_cgrp); > + release_agent = ve_get_release_agent_path( > + rcu_dereference(root_cgrp->ve_owner), > + root_cgrp->root); > > if (release_agent) > seq_puts(seq, release_agent); > @@ -910,7 +913,7 @@ void cgroup1_release_agent(struct work_struct *work) > goto continue_free; > } > > - release_agent = ve_get_release_agent_path(root_cgrp); > + release_agent = ve_get_release_agent_path(ve, root_cgrp->root); > > *agentbuf = 0; > if (release_agent) > @@ -931,7 +934,9 @@ void cgroup1_release_agent(struct work_struct *work) > envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; > envp[i] = NULL; > > + > mutex_unlock(&cgroup_mutex); > + > err = call_usermodehelper_ve(ve, argv[0], argv, > envp, UMH_WAIT_EXEC); > > @@ -939,6 +944,7 @@ void cgroup1_release_agent(struct work_struct *work) > pr_warn_ratelimited("cgroup1_release_agent " > "%s %s failed: %d\n", > agentbuf, pathbuf, err); > + up_write(&ve->op_sem); > mutex_lock(&cgroup_mutex); > continue_free: > kfree(pathbuf); > @@ -989,7 +995,6 @@ static int cgroup1_show_options(struct seq_file *seq, > struct kernfs_root *kf_roo > const char *release_agent; > struct cgroup_root *root = cgroup_root_from_kf(kf_root); > struct cgroup_subsys *ss; > - struct cgroup *root_cgrp = &root->cgrp; > int ssid; > > for_each_subsys(ss, ssid) > @@ -1003,32 +1008,7 @@ static int cgroup1_show_options(struct seq_file *seq, > struct kernfs_root *kf_roo > seq_puts(seq, ",cpuset_v2_mode"); > > rcu_read_lock(); > -#ifdef CONFIG_VE > - { > - struct ve_struct *ve = get_exec_env(); > - struct css_set *cset; > - struct nsproxy *ve_ns; > - > - if (!ve_is_super(ve)) { > - /* > - * ve->init_task is NULL in case when cgroup is accessed > - * before ve_start_container has been called. > - * > - * ve->init_task is synchronized via ve->ve_ns rcu, see > - * ve_grab_context/drop_context. > - */ > - ve_ns = rcu_dereference(ve->ve_ns); > - if (ve_ns) { > - spin_lock_irq(&css_set_lock); > - cset = ve_ns->cgroup_ns->root_cset; > - BUG_ON(!cset); > - root_cgrp = cset_cgroup_from_root(cset, root); > - spin_unlock_irq(&css_set_lock); > - } > - } > - } > -#endif > - release_agent = ve_get_release_agent_path(root_cgrp); > + release_agent = ve_get_release_agent_path(get_exec_env(), root); > if (release_agent && release_agent[0]) > seq_show_option(seq, "release_agent", release_agent); > rcu_read_unlock(); > @@ -1226,8 +1206,8 @@ static int cgroup1_remount(struct kernfs_root *kf_root, > int *flags, char *data) > > root_cgrp = cgroup_get_local_root(&root->cgrp); > if (root_cgrp->ve_owner) > - ret = ve_set_release_agent_path(root_cgrp, > - opts.release_agent); > + ret = ve_set_release_agent_path(root_cgrp->ve_owner, > + root_cgrp->root, opts.release_agent); > } > > trace_cgroup_remount(root); > diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c > index 75997b503d3c..09d328b76dab 100644 > --- a/kernel/cgroup/cgroup.c > +++ b/kernel/cgroup/cgroup.c > @@ -2152,7 +2152,8 @@ void init_cgroup_root(struct cgroup_root *root, struct > cgroup_sb_opts *opts) > > root->flags = opts->flags; > if (opts->release_agent) > - ve_set_release_agent_path(cgrp, opts->release_agent); > + ve_set_release_agent_path(cgrp->ve_owner, root, > + opts->release_agent); > > if (opts->name) > strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); > @@ -2360,7 +2361,7 @@ static void cgroup_kill_sb(struct super_block *sb) > percpu_ref_kill(&root->cgrp.self.refcnt); > cgroup_put(&root->cgrp); > kernfs_kill_sb(sb); > - ve_cleanup_per_cgroot_data(NULL, &root->cgrp); > + ve_cleanup_per_cgroot_data(&ve0, root); > } > > struct file_system_type cgroup_fs_type = { > diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c > index e8945e10e070..031b104075c8 100644 > --- a/kernel/ve/ve.c > +++ b/kernel/ve/ve.c > @@ -35,7 +35,7 @@ struct per_cgroot_data { > /* > * data is related to this cgroup > */ > - struct cgroup *cgroot; > + struct cgroup_root *cgroot; > > /* > * path to release agent binaray, that should > @@ -217,7 +217,7 @@ int nr_threads_ve(struct ve_struct *ve) > EXPORT_SYMBOL(nr_threads_ve); > > static struct per_cgroot_data *per_cgroot_data_find_locked( > - struct list_head *per_cgroot_list, struct cgroup *cgroot) > + struct list_head *per_cgroot_list, struct cgroup_root *cgroot) > { > struct per_cgroot_data *data; > > @@ -229,7 +229,7 @@ static struct per_cgroot_data > *per_cgroot_data_find_locked( > } > > static inline struct per_cgroot_data *per_cgroot_get_or_create( > - struct ve_struct *ve, struct cgroup *cgroot) > + struct ve_struct *ve, struct cgroup_root *cgroot) > { > struct per_cgroot_data *data, *other_data; > unsigned long flags; > @@ -263,10 +263,9 @@ static inline struct per_cgroot_data > *per_cgroot_get_or_create( > return data; > } > > -int ve_set_release_agent_path(struct cgroup *cgroot, > +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root > *cgroot, > const char *release_agent) > { > - struct ve_struct *ve; > unsigned long flags; > struct per_cgroot_data *data; > struct cgroup_rcu_string *new_path, *old_path; > @@ -276,7 +275,6 @@ int ve_set_release_agent_path(struct cgroup *cgroot, > * caller should grab cgroup_mutex to safely use > * ve_owner field > */ > - ve = cgroot->ve_owner; > BUG_ON(!ve); > > nbytes = strlen(release_agent); > @@ -304,16 +302,15 @@ int ve_set_release_agent_path(struct cgroup *cgroot, > return 0; > } > > -const char *ve_get_release_agent_path(struct cgroup *cgroot) > +const char *ve_get_release_agent_path(struct ve_struct *ve, > + struct cgroup_root *cgroot) > { > /* caller must grab rcu_read_lock */ > const char *result = NULL; > struct per_cgroot_data *data; > struct cgroup_rcu_string *str; > - struct ve_struct *ve; > unsigned long flags; > > - ve = rcu_dereference(cgroot->ve_owner); > if (!ve) > return NULL; > > @@ -677,15 +674,13 @@ static inline void per_cgroot_data_free(struct > per_cgroot_data *data) > kfree(data); > } > > -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp) > +void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup_root > *cgrp) > { > struct per_cgroot_data *data, *saved; > unsigned long flags; > > BUG_ON(!ve && !cgrp); > rcu_read_lock(); > - if (!ve) > - ve = cgroup_get_ve_owner(cgrp); > > spin_lock_irqsave(&ve->per_cgroot_list_lock, flags); > list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) { > _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
