cgroup: Added pointers to owning ve to root cgroups

Kirill Tkhai Thu, 26 Mar 2020 00:19:12 -0700

On 25.03.2020 19:30, Valeriy Vdovin wrote:
> Follow-up patch to per-cgroup release_agent property. release_agent
> notifications are spawned from a special kthread, running under ve0. But
> newly spawned tasks should run under their own ve context. Easy way to
> pass this information to a spawning thread is by adding 've_owner' field
> to a root cgroup. At notification any cgroup can be walked upwards to
> it's root and get ve_owner from there. ve_owner is initialized at container
> start, that's when we know all cgroup ve roots for free. For ve0 roots
> we don't need ve_owner field set, because we can always get pointer to ve0.
> Only place where we can detch ve_owner knowledge from root ve cgroups is
> at cgroup_exit for ve->init_task, because that's that's the last time, when
> ve->init_task is related to it's ve root cgroups (see cgroup_exit comments).
> 
> https://jira.sw.ru/browse/PSBM-83887
> Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com>
> ---
>  include/linux/cgroup.h |  3 +++
>  kernel/cgroup.c        | 56 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 59 insertions(+)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 5e289fc..34fc017 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -291,6 +291,9 @@ struct cgroup {
>       struct simple_xattrs xattrs;
>       u64 subgroups_limit;
>  
> +     /* ve_owner, responsible for running release agent. */
> +     struct ve_struct *ve_owner;
> +
>       /*
>        * Per-cgroup path to release agent binary for release
>        * notifications.
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index d1d4605..31e7db6 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4352,6 +4352,7 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>       mutex_lock(&cgroup_mutex);
>       for_each_active_root(root) {
>               cgrp = task_cgroup_from_root(ve->init_task, root);
> +             cgrp->ve_owner = ve;
>               set_bit(CGRP_VE_ROOT, &cgrp->flags);
>               err = cgroup_add_file_on_mark_ve(cgrp);
>               if (err)
> @@ -4363,6 +4364,20 @@ int cgroup_mark_ve_root(struct ve_struct *ve)
>       return err;
>  }
>  
> +static void cgroup_unbind_roots_from_ve(struct ve_struct *ve)
> +{
> +     struct cgroup *cgrp;
> +     struct cgroupfs_root *root;
> +
> +     mutex_lock(&cgroup_mutex);
> +     for_each_active_root(root) {
> +             cgrp = task_cgroup_from_root(ve->init_task, root);
> +             BUG_ON(!cgrp->ve_owner);
> +             cgrp->ve_owner = NULL;
> +     }
> +     mutex_unlock(&cgroup_mutex);
> +}
> +
>  struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
>  {
>       struct cgroup *ve_root = NULL;
> @@ -5401,6 +5416,14 @@ void cgroup_exit(struct task_struct *tsk, int 
> run_callbacks)
>       int i;
>  
>       /*
> +      * Detect that the task in question is ve->init_task
> +      * if so, unbind all cgroups that want to be released under
> +      * this ve.
> +      */
> +     if (!ve_is_super(tsk->task_ve) && tsk == tsk->task_ve->init_task)
> +             cgroup_unbind_roots_from_ve(tsk->task_ve);
> +
> +     /*
>        * Unlink from the css_set task list if necessary.
>        * Optimistically check cg_list before taking
>        * css_set_lock
> @@ -5493,6 +5516,7 @@ static void cgroup_release_agent(struct work_struct 
> *work)
>       raw_spin_lock(&release_list_lock);
>       while (!list_empty(&release_list)) {
>               char *argv[3], *envp[3];
> +             struct ve_struct *ve = NULL;
>               int i, err;
>               const char *release_agent;
>               char *pathbuf = NULL, *agentbuf = NULL;
> @@ -5507,14 +5531,38 @@ static void cgroup_release_agent(struct work_struct 
> *work)
>                       goto continue_free;
>               if (__cgroup_path(cgrp, pathbuf, PAGE_SIZE, true) < 0)
>                       goto continue_free;
> +
> +             /*
> +              * Deduce under which VE we are going to spawn nofitier
> +              * binary. Non-root VE has it's VE-local root cgroup,
> +              * marked with VE_ROOT flag. It has non-NULL ve_owner
> +              * set during cgroup_mark_ve_root.
> +              * VE0 root cgroup does not need ve_owner field, because
> +              * it's ve is ve0. Non-VE root cgroup does not have a parent.
> +              */
>               root_cgrp = cgroup_get_local_root(cgrp);
> +             if (root_cgrp->parent == NULL)
> +                     ve = &ve0;
> +             else
> +                     ve = root_cgrp->ve_owner;
> +             if (!ve)
> +                     goto continue_free;
> +
> +             down_read(&ve->op_sem);
> +             if (ve->is_running) {
> +                     up_read(&ve->op_sem);
> +                     goto continue_free;
> +             }
> +
>               rcu_read_lock();
> +
>               release_agent = cgroup_release_agent_path(root_cgrp);
>               if (release_agent)
>                       agentbuf = kstrdup(release_agent, GFP_KERNEL);
>               rcu_read_unlock();
>               if (!agentbuf)
>                       goto continue_free;
> +             get_ve(ve);
>  
>               i = 0;
>               argv[i++] = agentbuf;
> @@ -5530,12 +5578,20 @@ static void cgroup_release_agent(struct work_struct 
> *work)
>               /* Drop the lock while we invoke the usermode helper,
>                * since the exec could involve hitting disk and hence
>                * be a slow process */
> +
>               mutex_unlock(&cgroup_mutex);
> +#ifdef CONFIG_VE
> +             err = call_usermodehelper_fns_ve(ve, argv[0], argv,
> +                     envp, UMH_WAIT_EXEC, NULL, NULL, NULL);


It's a bad thing to call userspace task under kernel mutex. 1)The task may hang 
forever
and 2)this introduces untracked circular dependencies between kernel locks.

> +#else
>               err = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
> +#endif
>               if (err < 0)
>                       pr_warn_ratelimited("cgroup release_agent "
>                                           "%s %s failed: %d\n",
>                                           agentbuf, pathbuf, err);
> +             up_read(&ve->op_sem);
> +             put_ve(ve);
>  
>               mutex_lock(&cgroup_mutex);
>   continue_free:
> 

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Re: [Devel] [PATCH 2/2 v4] ve/cgroup: Added pointers to owning ve to root cgroups

Reply via email to