On 15.05.2020 19:13, Andrey Ryabinin wrote:
> From: Tejun Heo <[email protected]>
> 
> There are three subsystem callbacks in css shutdown path -
> css_offline(), css_released() and css_free().  Except for
> css_released(), cgroup core didn't guarantee the order of invocation.
> css_offline() or css_free() could be called on a parent css before its
> children.  This behavior is unexpected and led to bugs in cpu and
> memory controller.
> 
> This patch updates offline path so that a parent css is never offlined
> before its children.  Each css keeps online_cnt which reaches zero iff
> itself and all its children are offline and offline_css() is invoked
> only after online_cnt reaches zero.
> 
> This fixes the memory controller bug and allows the fix for cpu
> controller.
> 
> Signed-off-by: Tejun Heo <[email protected]>
> Reported-and-tested-by: Christian Borntraeger <[email protected]>
> Reported-by: Brian Christiansen <[email protected]>
> Link: http://lkml.kernel.org/g/[email protected]
> Link: 
> http://lkml.kernel.org/g/cakb58ikdkzc8ret31wbkd99+hxnzjk4+fbmhkgs+nvrc9vj...@mail.gmail.com
> Cc: Heiko Carstens <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: [email protected]
> 
> https://jira.sw.ru/browse/PSBM-104029
> (cherry picked from commit aa226ff4a1ce79f229c6b7a4c0a14e17fececd01)
> [aryabinin: User refcounts instead of atomics]
> Signed-off-by: Andrey Ryabinin <[email protected]>

Reviewed-by: Kirill Tkhai <[email protected]>

> ---
>  include/linux/cgroup.h |  6 ++++
>  kernel/cgroup.c        | 66 +++++++++++++++++++++++++-----------------
>  2 files changed, 45 insertions(+), 27 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index bd48216e7ac2..1c1ee0c458e0 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -242,6 +242,12 @@ struct cgroup {
>        */
>       atomic_t count;
>  
> +     /*
> +      * Incremented by online self and children.  Used to guarantee that
> +      * parents are not offlined before their children.
> +      */
> +     refcount_t online_cnt;
> +
>       int id;                         /* ida allocated in-hierarchy ID */
>  
>       /*
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 50d4b3df0f0a..17ba1ab9c7b0 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1422,6 +1422,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
>       INIT_LIST_HEAD(&root->subsys_list);
>       INIT_LIST_HEAD(&root->root_list);
>       INIT_LIST_HEAD(&root->allcg_list);
> +     refcount_set(&cgrp->online_cnt, 1);
>       root->number_of_cgroups = 1;
>       cgrp->root = root;
>       cgrp->name = &root_cgroup_name;
> @@ -4238,8 +4239,13 @@ static int online_css(struct cgroup_subsys *ss, struct 
> cgroup *cgrp)
>  
>       if (ss->css_online)
>               ret = ss->css_online(cgrp);
> -     if (!ret)
> +     if (!ret) {
>               cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
> +
> +             refcount_inc(&cgrp->online_cnt);
> +             if (cgrp->parent)
> +                     refcount_inc(&cgrp->parent->online_cnt);
> +     }
>       return ret;
>  }
>  
> @@ -4509,9 +4515,11 @@ static void cgroup_css_killed(struct cgroup *cgrp)
>       if (!atomic_dec_and_test(&cgrp->css_kill_cnt))
>               return;
>  
> -     /* percpu ref's of all css's are killed, kick off the next step */
> -     INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
> -     queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
> +     if (refcount_dec_and_test(&cgrp->online_cnt)) {
> +             /* percpu ref's of all css's are killed, kick off the next step 
> */
> +             INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
> +             queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
> +     }
>  }
>  
>  static void css_ref_killed_fn(struct percpu_ref *ref)
> @@ -4649,37 +4657,41 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
>  static void cgroup_offline_fn(struct work_struct *work)
>  {
>       struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
> -     struct cgroup *parent = cgrp->parent;
> -     struct dentry *d = cgrp->dentry;
>       struct cgroup_subsys *ss;
>  
>       mutex_lock(&cgroup_mutex);
>  
> -     /*
> -      * css_tryget() is guaranteed to fail now.  Tell subsystems to
> -      * initate destruction.
> -      */
> -     for_each_subsys(cgrp->root, ss)
> -             offline_css(ss, cgrp);
> +     do {
> +             struct cgroup *parent = cgrp->parent;
> +             struct dentry *d = cgrp->dentry;
>  
> -     /*
> -      * Put the css refs from cgroup_destroy_locked().  Each css holds
> -      * an extra reference to the cgroup's dentry and cgroup removal
> -      * proceeds regardless of css refs.  On the last put of each css,
> -      * whenever that may be, the extra dentry ref is put so that dentry
> -      * destruction happens only after all css's are released.
> -      */
> -     for_each_subsys(cgrp->root, ss)
> -             css_put(cgrp->subsys[ss->subsys_id]);
> +             /*
> +              * css_tryget() is guaranteed to fail now.  Tell subsystems to
> +              * initate destruction.
> +              */
> +             for_each_subsys(cgrp->root, ss)
> +                     offline_css(ss, cgrp);
>  
> -     /* delete this cgroup from parent->children */
> -     list_del_rcu(&cgrp->sibling);
> -     list_del_init(&cgrp->allcg_node);
> +             /*
> +              * Put the css refs from cgroup_destroy_locked().  Each css 
> holds
> +              * an extra reference to the cgroup's dentry and cgroup removal
> +              * proceeds regardless of css refs.  On the last put of each 
> css,
> +              * whenever that may be, the extra dentry ref is put so that 
> dentry
> +              * destruction happens only after all css's are released.
> +              */
> +             for_each_subsys(cgrp->root, ss)
> +                     css_put(cgrp->subsys[ss->subsys_id]);
>  
> -     dput(d);
> +             /* delete this cgroup from parent->children */
> +             list_del_rcu(&cgrp->sibling);
> +             list_del_init(&cgrp->allcg_node);
> +
> +             dput(d);
>  
> -     set_bit(CGRP_RELEASABLE, &parent->flags);
> -     check_for_release(parent);
> +             set_bit(CGRP_RELEASABLE, &parent->flags);
> +             check_for_release(parent);
> +             cgrp = parent;
> +     } while (cgrp && refcount_dec_and_test(&cgrp->online_cnt));
>  
>       mutex_unlock(&cgroup_mutex);
>  }
> 

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to