On 28.07.2020 20:53, Valeriy Vdovin wrote:
> This is done so that each container could set it's own release agent.
> Release agent information is now stored in per-cgroup-root data
> structure in ve.
> 
> https://jira.sw.ru/browse/PSBM-83887
> 
> Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com>
> ---
>  include/linux/cgroup.h |   3 --
>  include/linux/ve.h     |   6 +++
>  kernel/cgroup.c        | 100 
> ++++++++++++++++++++++++++++++++++++++++---------
>  kernel/ve/ve.c         |  72 +++++++++++++++++++++++++++++++++++
>  4 files changed, 161 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 5f1460d..fc138c0 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -429,9 +429,6 @@ struct cgroupfs_root {
>       /* IDs for cgroups in this hierarchy */
>       struct ida cgroup_ida;
>  
> -     /* The path to use for release notifications. */
> -     char release_agent_path[PATH_MAX];
> -
>       /* The name for this hierarchy - may be empty */
>       char name[MAX_CGROUP_ROOT_NAMELEN];
>  };
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 65413d5..b6662637 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -214,6 +214,12 @@ void do_update_load_avg_ve(void);
>  
>  void ve_add_to_release_list(struct cgroup *cgrp);
>  void ve_rm_from_release_list(struct cgroup *cgrp);
> +
> +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot,
> +     const char *release_agent);
> +
> +const char *ve_get_release_agent_path(struct cgroup *cgrp_root);
> +
>  extern struct ve_struct *get_ve(struct ve_struct *ve);
>  extern void put_ve(struct ve_struct *ve);
>  
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index aa93cf2..1d9c889 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1092,9 +1092,12 @@ static int rebind_subsystems(struct cgroupfs_root 
> *root,
>  
>  static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
>  {
> +     const char *release_agent;
>       struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
>       struct cgroup_subsys *ss;
> +     struct cgroup *root_cgrp = &root->top_cgroup;
>  
> +     mutex_lock(&cgroup_mutex);
>       mutex_lock(&cgroup_root_mutex);
>       for_each_subsys(root, ss)
>               seq_printf(seq, ",%s", ss->name);
> @@ -1106,14 +1109,37 @@ static int cgroup_show_options(struct seq_file *seq, 
> struct dentry *dentry)
>               seq_puts(seq, ",xattr");
>       if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
>               seq_puts(seq, ",cpuset_v2_mode");
> -     if (strlen(root->release_agent_path))
> -             seq_show_option(seq, "release_agent",
> -                             root->release_agent_path);
> +#ifdef CONFIG_VE
> +     {
> +             struct ve_struct *ve = get_exec_env();
> +
> +             if (!ve_is_super(ve)) {
> +                     /*
> +                      * ve->init_task is NULL in case when cgroup is accessed
> +                      * before ve_start_container has been called.
> +                      *
> +                      * ve->init_task is synchronized via ve->ve_ns rcu, see
> +                      * ve_grab_context/drop_context.
> +                      */
> +                     rcu_read_lock();
> +                     if (ve->ve_ns)
> +                             root_cgrp = task_cgroup_from_root(ve->init_task,
> +                                     root);
> +                     rcu_read_unlock();
> +             }
> +     }
> +#endif
> +     rcu_read_lock();
> +     release_agent = ve_get_release_agent_path(root_cgrp);
> +     if (release_agent && release_agent[0])
> +             seq_show_option(seq, "release_agent", release_agent);
> +     rcu_read_unlock();
>       if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
>               seq_puts(seq, ",clone_children");
>       if (strlen(root->name))
>               seq_show_option(seq, "name", root->name);
>       mutex_unlock(&cgroup_root_mutex);
> +     mutex_unlock(&cgroup_mutex);
>       return 0;
>  }
>  
> @@ -1386,8 +1412,13 @@ static int cgroup_remount(struct super_block *sb, int 
> *flags, char *data)
>       /* re-populate subsystem files */
>       cgroup_populate_dir(cgrp, false, added_mask);
>  
> -     if (opts.release_agent)
> -             strcpy(root->release_agent_path, opts.release_agent);
> +     if (opts.release_agent) {
> +             struct cgroup *root_cgrp;
> +             root_cgrp = cgroup_get_local_root(cgrp);
> +             if (root_cgrp->ve_owner)
> +                     ret = ve_set_release_agent_path(root_cgrp,
> +                             opts.release_agent);
> +     }
>   out_unlock:
>       kfree(opts.release_agent);
>       kfree(opts.name);
> @@ -1549,8 +1580,6 @@ static struct cgroupfs_root 
> *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
>       root->subsys_mask = opts->subsys_mask;
>       root->flags = opts->flags;
>       ida_init(&root->cgroup_ida);
> -     if (opts->release_agent)
> -             strcpy(root->release_agent_path, opts->release_agent);
>       if (opts->name)
>               strcpy(root->name, opts->name);
>       if (opts->cpuset_clone_children)
> @@ -1748,6 +1777,11 @@ static struct dentry *cgroup_mount(struct 
> file_system_type *fs_type,
>  
>               cred = override_creds(&init_cred);
>               cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
> +             if (opts.release_agent) {
> +                     ret = ve_set_release_agent_path(root_cgrp,
> +                             opts.release_agent);
> +             }
> +
>               revert_creds(cred);
>               mutex_unlock(&cgroup_root_mutex);
>               mutex_unlock(&cgroup_mutex);
> @@ -2317,7 +2351,8 @@ static int cgroup_procs_write(struct cgroup *cgrp, 
> struct cftype *cft, u64 tgid)
>  static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype 
> *cft,
>                                     const char *buffer)
>  {
> -     BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
> +     int ret = 0;
> +     struct cgroup *root_cgrp;
>  
>       if (strlen(buffer) >= PATH_MAX)
>               return -EINVAL;
> @@ -2325,19 +2360,35 @@ static int cgroup_release_agent_write(struct cgroup 
> *cgrp, struct cftype *cft,
>       if (!cgroup_lock_live_group(cgrp))
>               return -ENODEV;
>  
> -     mutex_lock(&cgroup_root_mutex);
> -     strcpy(cgrp->root->release_agent_path, buffer);
> -     mutex_unlock(&cgroup_root_mutex);
> +     root_cgrp = cgroup_get_local_root(cgrp);
> +     BUG_ON(!root_cgrp);
> +     if (root_cgrp->ve_owner)
> +             ret = ve_set_release_agent_path(root_cgrp, buffer);
> +     else
> +             return -ENODEV;

Missed mutex_unlock()

> +
>       mutex_unlock(&cgroup_mutex);
> -     return 0;
> +     return ret;
>  }
>  
>  static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
>                                    struct seq_file *seq)
>  {
> +     const char *release_agent;
> +     struct cgroup *root_cgrp;
> +
>       if (!cgroup_lock_live_group(cgrp))
>               return -ENODEV;
> -     seq_puts(seq, cgrp->root->release_agent_path);
> +
> +     root_cgrp = cgroup_get_local_root(cgrp);
> +     if (root_cgrp->ve_owner) {
> +             rcu_read_lock();
> +             release_agent = ve_get_release_agent_path(root_cgrp);
> +
> +             if (release_agent)
> +                     seq_puts(seq, release_agent);
> +             rcu_read_unlock();
> +     }
>       seq_putc(seq, '\n');
>       mutex_unlock(&cgroup_mutex);
>       return 0;
> @@ -5532,15 +5583,24 @@ static void check_for_release(struct cgroup *cgrp)
>  void cgroup_release_agent(struct work_struct *work)
>  {
>       struct ve_struct *ve;
> +     char *agentbuf;
> +
> +     agentbuf = kzalloc(PATH_MAX, GFP_KERNEL);
> +     if (!agentbuf) {
> +             pr_warn("failed to allocate agentbuf\n");
> +             return;
> +     }
> +
>       ve = container_of(work, struct ve_struct, release_agent_work);
>       mutex_lock(&cgroup_mutex);
>       raw_spin_lock(&ve->release_list_lock);
>       while (!list_empty(&ve->release_list)) {
>               char *argv[3], *envp[3];
>               int i, err;
> -             char *pathbuf = NULL, *agentbuf = NULL;
> +             char *pathbuf = NULL;
>               struct cgroup *cgrp, *root_cgrp;
>               struct task_struct *ve_task;
> +             const char *release_agent;
>  
>               cgrp = list_entry(ve->release_list.next,
>                                 struct cgroup,
> @@ -5568,9 +5628,15 @@ void cgroup_release_agent(struct work_struct *work)
>                       rcu_read_unlock();
>                       goto continue_free;
>               }
> +
> +             release_agent = ve_get_release_agent_path(root_cgrp);
> +
> +             *agentbuf = 0;
> +             if (release_agent)
> +                     strncpy(agentbuf, release_agent, PATH_MAX);
>               rcu_read_unlock();
> -             agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
> -             if (!agentbuf)
> +
> +             if (!*agentbuf)
>                       goto continue_free;
>  
>               i = 0;
> @@ -5601,11 +5667,11 @@ void cgroup_release_agent(struct work_struct *work)
>               mutex_lock(&cgroup_mutex);
>   continue_free:
>               kfree(pathbuf);
> -             kfree(agentbuf);
>               raw_spin_lock(&ve->release_list_lock);
>       }
>       raw_spin_unlock(&ve->release_list_lock);
>       mutex_unlock(&cgroup_mutex);
> +     kfree(agentbuf);
>  }
>  
>  static int __init cgroup_disable(char *str)
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index f564dca..f03f665 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -51,6 +51,11 @@ struct per_cgroot_data {
>        * data is related to this cgroup
>        */
>       struct cgroup *cgroot;
> +     /*
> +      * path to release agent binaray, that should
> +      * be spawned for all cgroups under this cgroup root
> +      */
> +     struct cgroup_rcu_string __rcu *release_agent_path;
>  };
>  
>  extern struct kmapset_set sysfs_ve_perms_set;
> @@ -175,6 +180,68 @@ static inline struct per_cgroot_data 
> *per_cgroot_get_or_create(
>       return data;
>  }
>  
> +int ve_set_release_agent_path(struct cgroup *cgroot,
> +     const char *release_agent)
> +{
> +     struct ve_struct *ve;
> +     struct per_cgroot_data *data;
> +     struct cgroup_rcu_string *new_path, *old_path;
> +     int err = 0;
> +
> +     /*
> +      * caller should grab cgroup_mutex to safely use
> +      * ve_owner field
> +      */
> +     ve = cgroot->ve_owner;
> +     BUG_ON(!ve);
> +
> +     new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent));
> +     if (IS_ERR(new_path))
> +             return PTR_ERR(new_path);
> +
> +     data = per_cgroot_get_or_create(ve, cgroot);
> +     if (IS_ERR(data)) {
> +             kfree(new_path);
> +             return PTR_ERR(data);
> +     }
> +
> +     raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> +     old_path = rcu_dereference_protected(data->release_agent_path,
> +             lockdep_is_held(&ve->per_cgroot_list_lock));
> +
> +     rcu_assign_pointer(data->release_agent_path, new_path);
> +     raw_spin_unlock(&ve->per_cgroot_list_lock);
> +
> +     if (old_path)
> +             kfree_rcu(old_path, rcu_head);
> +
> +     return err;
> +}
> +
> +const char *ve_get_release_agent_path(struct cgroup *cgroot)
> +{
> +     /* caller must grab rcu_read_lock */
> +     const char *result = NULL;
> +     struct per_cgroot_data *data;
> +     struct cgroup_rcu_string *str;
> +     struct ve_struct *ve;
> +     ve = rcu_dereference(cgroot->ve_owner);
> +     if (!ve)
> +             return NULL;
> +
> +     raw_spin_lock(&ve->per_cgroot_list_lock);
> +
> +     data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot);
> +     if (data) {
> +             str = rcu_dereference(data->release_agent_path);
> +             if (str)
> +                     result = str->val;
> +     }
> +     raw_spin_unlock(&ve->per_cgroot_list_lock);
> +     return result;
> +}
> +
>  struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int 
> subsys_id)
>  {
>       struct cgroup_subsys_state *css, *tmp;
> @@ -677,9 +744,14 @@ err_list:
>  static void ve_per_cgroot_free(struct ve_struct *ve)
>  {
>       struct per_cgroot_data *data, *saved;
> +     struct cgroup_rcu_string *release_agent;
>  
>       raw_spin_lock(&ve->per_cgroot_list_lock);
>       list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) {
> +             release_agent = data->release_agent_path;
> +             RCU_INIT_POINTER(data->release_agent_path, NULL);
> +             if (release_agent)
> +                     kfree_rcu(release_agent, rcu_head);
>               list_del_init(&data->list);
>               kfree(data);
>       }
> 

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to