On 15.02.2021 14:00, Kirill Tkhai wrote:
> On 10.02.2021 13:03, Valeriy Vdovin wrote:
>> Due to virtualization of release_agent cgroup property, cgroup1_show_options
>> has become more complex.
>> struct cgroup_root is one of the arguments to that function, it was 
>> previously
>> holding the value of release_agent. But now this property is per-ve AND
>> per-cgroup.  That's why to find the right release_agent value, the code 
>> should
>> convert cgroup_root into one specific cgroup that is a 'virtual cgroup root' 
>> of
>> a container, represented by the current VE. Getting ve is trivial but cgroup
>> can be found by a helper function that will iterate css_set links under
>> cgroup_mutex lock. There is a lock inversion problem when using cgroup_mutex
>> in cgroup1_show_options, lockdep shows cgroup_mutex conflicts with
>> kernfs_node->dep_map.
>> This can be solved easily by converting per-cgroup data structure in VE into
>> per-cgroup-root. This way we can provide ve_set(get)release_agent_path 
>> directly
>> with struct cgroup_root agrument. For each cgroup hierarchy there is only one
>> root and for each VE there can only be one virtual root either, that's why
>> it is safe to just use cgroup_root as a key to find the proper release_agent
>> path in each VE.
>>
>> Signed-off-by: Valeriy Vdovin <[email protected]>
> 
> Reviewed-by: Kirill Tkhai <[email protected]>

NACK
 
>> ---
>>  include/linux/ve.h        |  8 ++++---
>>  kernel/cgroup/cgroup-v1.c | 44 +++++++++++----------------------------
>>  kernel/cgroup/cgroup.c    |  5 +++--
>>  kernel/ve/ve.c            | 19 +++++++----------
>>  4 files changed, 27 insertions(+), 49 deletions(-)
>>
>> diff --git a/include/linux/ve.h b/include/linux/ve.h
>> index 7cef4b39847e..3b487f8a4a50 100644
>> --- a/include/linux/ve.h
>> +++ b/include/linux/ve.h
>> @@ -145,12 +145,14 @@ extern int nr_ve;
>>  void ve_add_to_release_list(struct cgroup *cgrp);
>>  void ve_rm_from_release_list(struct cgroup *cgrp);
>>  
>> -int ve_set_release_agent_path(struct cgroup *cgroot,
>> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root 
>> *cgroot,
>>      const char *release_agent);
>>  
>> -const char *ve_get_release_agent_path(struct cgroup *cgrp_root);
>> +const char *ve_get_release_agent_path(struct ve_struct *ve,
>> +    struct cgroup_root *cgroot);
>>  
>> -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp);
>> +void ve_cleanup_per_cgroot_data(struct ve_struct *ve,
>> +    struct cgroup_root *cgrp);
>>  
>>  extern struct ve_struct *get_ve(struct ve_struct *ve);
>>  extern void put_ve(struct ve_struct *ve);
>> diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
>> index 46be2f688503..993ac38b895f 100644
>> --- a/kernel/cgroup/cgroup-v1.c
>> +++ b/kernel/cgroup/cgroup-v1.c
>> @@ -577,7 +577,8 @@ static ssize_t cgroup_release_agent_write(struct 
>> kernfs_open_file *of,
>>      }
>>  
>>      if (root_cgrp->ve_owner)
>> -            ret = ve_set_release_agent_path(root_cgrp, strstrip(buf));
>> +            ret = ve_set_release_agent_path(root_cgrp->ve_owner,
>> +                    root_cgrp->root, strstrip(buf));
>>      else
>>              ret = -ENODEV;
>>  
>> @@ -598,7 +599,9 @@ static int cgroup_release_agent_show(struct seq_file 
>> *seq, void *v)
>>      root_cgrp = cgroup_get_local_root(cgrp);
>>      if (root_cgrp->ve_owner) {
>>              rcu_read_lock();
>> -            release_agent = ve_get_release_agent_path(root_cgrp);
>> +            release_agent = ve_get_release_agent_path(
>> +                    rcu_dereference(root_cgrp->ve_owner),
>> +                    root_cgrp->root);
>>  
>>              if (release_agent)
>>                      seq_puts(seq, release_agent);
>> @@ -910,7 +913,7 @@ void cgroup1_release_agent(struct work_struct *work)
>>                      goto continue_free;
>>              }
>>  
>> -            release_agent = ve_get_release_agent_path(root_cgrp);
>> +            release_agent = ve_get_release_agent_path(ve, root_cgrp->root);
>>  
>>              *agentbuf = 0;
>>              if (release_agent)
>> @@ -931,7 +934,9 @@ void cgroup1_release_agent(struct work_struct *work)
>>              envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
>>              envp[i] = NULL;
>>  
>> +
>>              mutex_unlock(&cgroup_mutex);
>> +
>>              err = call_usermodehelper_ve(ve, argv[0], argv,
>>                      envp, UMH_WAIT_EXEC);
>>  
>> @@ -939,6 +944,7 @@ void cgroup1_release_agent(struct work_struct *work)
>>                      pr_warn_ratelimited("cgroup1_release_agent "
>>                                          "%s %s failed: %d\n",
>>                                          agentbuf, pathbuf, err);
>> +            up_write(&ve->op_sem);

This must go in other commit.

>>              mutex_lock(&cgroup_mutex);
>>  continue_free:
>>              kfree(pathbuf);
>> @@ -989,7 +995,6 @@ static int cgroup1_show_options(struct seq_file *seq, 
>> struct kernfs_root *kf_roo
>>      const char *release_agent;
>>      struct cgroup_root *root = cgroup_root_from_kf(kf_root);
>>      struct cgroup_subsys *ss;
>> -    struct cgroup *root_cgrp = &root->cgrp;
>>      int ssid;
>>  
>>      for_each_subsys(ss, ssid)
>> @@ -1003,32 +1008,7 @@ static int cgroup1_show_options(struct seq_file *seq, 
>> struct kernfs_root *kf_roo
>>              seq_puts(seq, ",cpuset_v2_mode");
>>  
>>      rcu_read_lock();
>> -#ifdef CONFIG_VE
>> -    {
>> -            struct ve_struct *ve = get_exec_env();
>> -            struct css_set *cset;
>> -            struct nsproxy *ve_ns;
>> -
>> -            if (!ve_is_super(ve)) {
>> -                    /*
>> -                     * ve->init_task is NULL in case when cgroup is accessed
>> -                     * before ve_start_container has been called.
>> -                     *
>> -                     * ve->init_task is synchronized via ve->ve_ns rcu, see
>> -                     * ve_grab_context/drop_context.
>> -                     */
>> -                    ve_ns = rcu_dereference(ve->ve_ns);
>> -                    if (ve_ns) {
>> -                            spin_lock_irq(&css_set_lock);
>> -                            cset = ve_ns->cgroup_ns->root_cset;
>> -                            BUG_ON(!cset);
>> -                            root_cgrp = cset_cgroup_from_root(cset, root);
>> -                            spin_unlock_irq(&css_set_lock);
>> -                    }
>> -            }
>> -    }
>> -#endif
>> -    release_agent = ve_get_release_agent_path(root_cgrp);
>> +    release_agent = ve_get_release_agent_path(get_exec_env(), root);
>>      if (release_agent && release_agent[0])
>>              seq_show_option(seq, "release_agent", release_agent);
>>      rcu_read_unlock();
>> @@ -1226,8 +1206,8 @@ static int cgroup1_remount(struct kernfs_root 
>> *kf_root, int *flags, char *data)
>>  
>>              root_cgrp = cgroup_get_local_root(&root->cgrp);
>>              if (root_cgrp->ve_owner)
>> -                    ret = ve_set_release_agent_path(root_cgrp,
>> -                            opts.release_agent);
>> +                    ret = ve_set_release_agent_path(root_cgrp->ve_owner,
>> +                            root_cgrp->root, opts.release_agent);
>>      }
>>  
>>      trace_cgroup_remount(root);
>> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
>> index 75997b503d3c..09d328b76dab 100644
>> --- a/kernel/cgroup/cgroup.c
>> +++ b/kernel/cgroup/cgroup.c
>> @@ -2152,7 +2152,8 @@ void init_cgroup_root(struct cgroup_root *root, struct 
>> cgroup_sb_opts *opts)
>>  
>>      root->flags = opts->flags;
>>      if (opts->release_agent)
>> -            ve_set_release_agent_path(cgrp, opts->release_agent);
>> +            ve_set_release_agent_path(cgrp->ve_owner, root,
>> +                    opts->release_agent);
>>  
>>      if (opts->name)
>>              strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
>> @@ -2360,7 +2361,7 @@ static void cgroup_kill_sb(struct super_block *sb)
>>              percpu_ref_kill(&root->cgrp.self.refcnt);
>>      cgroup_put(&root->cgrp);
>>      kernfs_kill_sb(sb);
>> -    ve_cleanup_per_cgroot_data(NULL, &root->cgrp);
>> +    ve_cleanup_per_cgroot_data(&ve0, root);
>>  }
>>  
>>  struct file_system_type cgroup_fs_type = {
>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>> index e8945e10e070..031b104075c8 100644
>> --- a/kernel/ve/ve.c
>> +++ b/kernel/ve/ve.c
>> @@ -35,7 +35,7 @@ struct per_cgroot_data {
>>      /*
>>       * data is related to this cgroup
>>       */
>> -    struct cgroup *cgroot;
>> +    struct cgroup_root *cgroot;
>>  
>>      /*
>>       * path to release agent binaray, that should
>> @@ -217,7 +217,7 @@ int nr_threads_ve(struct ve_struct *ve)
>>  EXPORT_SYMBOL(nr_threads_ve);
>>  
>>  static struct per_cgroot_data *per_cgroot_data_find_locked(
>> -    struct list_head *per_cgroot_list, struct cgroup *cgroot)
>> +    struct list_head *per_cgroot_list, struct cgroup_root *cgroot)
>>  {
>>      struct per_cgroot_data *data;
>>  
>> @@ -229,7 +229,7 @@ static struct per_cgroot_data 
>> *per_cgroot_data_find_locked(
>>  }
>>  
>>  static inline struct per_cgroot_data *per_cgroot_get_or_create(
>> -    struct ve_struct *ve, struct cgroup *cgroot)
>> +    struct ve_struct *ve, struct cgroup_root *cgroot)
>>  {
>>      struct per_cgroot_data *data, *other_data;
>>      unsigned long flags;
>> @@ -263,10 +263,9 @@ static inline struct per_cgroot_data 
>> *per_cgroot_get_or_create(
>>      return data;
>>  }
>>  
>> -int ve_set_release_agent_path(struct cgroup *cgroot,
>> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup_root 
>> *cgroot,
>>      const char *release_agent)
>>  {
>> -    struct ve_struct *ve;
>>      unsigned long flags;
>>      struct per_cgroot_data *data;
>>      struct cgroup_rcu_string *new_path, *old_path;
>> @@ -276,7 +275,6 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
>>       * caller should grab cgroup_mutex to safely use
>>       * ve_owner field
>>       */
>> -    ve = cgroot->ve_owner;
>>      BUG_ON(!ve);
>>  
>>      nbytes = strlen(release_agent);
>> @@ -304,16 +302,15 @@ int ve_set_release_agent_path(struct cgroup *cgroot,
>>      return 0;
>>  }
>>  
>> -const char *ve_get_release_agent_path(struct cgroup *cgroot)
>> +const char *ve_get_release_agent_path(struct ve_struct *ve,
>> +    struct cgroup_root *cgroot)
>>  {
>>      /* caller must grab rcu_read_lock */
>>      const char *result = NULL;
>>      struct per_cgroot_data *data;
>>      struct cgroup_rcu_string *str;
>> -    struct ve_struct *ve;
>>      unsigned long flags;
>>  
>> -    ve = rcu_dereference(cgroot->ve_owner);
>>      if (!ve)
>>              return NULL;
>>  
>> @@ -677,15 +674,13 @@ static inline void per_cgroot_data_free(struct 
>> per_cgroot_data *data)
>>      kfree(data);
>>  }
>>  
>> -void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup *cgrp)
>> +void ve_cleanup_per_cgroot_data(struct ve_struct *ve, struct cgroup_root 
>> *cgrp)
>>  {
>>      struct per_cgroot_data *data, *saved;
>>      unsigned long flags;
>>  
>>      BUG_ON(!ve && !cgrp);
>>      rcu_read_lock();
>> -    if (!ve)
>> -            ve = cgroup_get_ve_owner(cgrp);
>>  
>>      spin_lock_irqsave(&ve->per_cgroot_list_lock, flags);
>>      list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
>>
> 

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to