On 15.05.2020 19:13, Andrey Ryabinin wrote: > From: Tejun Heo <[email protected]> > > There are three subsystem callbacks in css shutdown path - > css_offline(), css_released() and css_free(). Except for > css_released(), cgroup core didn't guarantee the order of invocation. > css_offline() or css_free() could be called on a parent css before its > children. This behavior is unexpected and led to bugs in cpu and > memory controller. > > This patch updates offline path so that a parent css is never offlined > before its children. Each css keeps online_cnt which reaches zero iff > itself and all its children are offline and offline_css() is invoked > only after online_cnt reaches zero. > > This fixes the memory controller bug and allows the fix for cpu > controller. > > Signed-off-by: Tejun Heo <[email protected]> > Reported-and-tested-by: Christian Borntraeger <[email protected]> > Reported-by: Brian Christiansen <[email protected]> > Link: http://lkml.kernel.org/g/[email protected] > Link: > http://lkml.kernel.org/g/cakb58ikdkzc8ret31wbkd99+hxnzjk4+fbmhkgs+nvrc9vj...@mail.gmail.com > Cc: Heiko Carstens <[email protected]> > Cc: Peter Zijlstra <[email protected]> > Cc: [email protected] > > https://jira.sw.ru/browse/PSBM-104029 > (cherry picked from commit aa226ff4a1ce79f229c6b7a4c0a14e17fececd01) > [aryabinin: User refcounts instead of atomics] > Signed-off-by: Andrey Ryabinin <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]> > --- > include/linux/cgroup.h | 6 ++++ > kernel/cgroup.c | 66 +++++++++++++++++++++++++----------------- > 2 files changed, 45 insertions(+), 27 deletions(-) > > diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h > index bd48216e7ac2..1c1ee0c458e0 100644 > --- a/include/linux/cgroup.h > +++ b/include/linux/cgroup.h > @@ -242,6 +242,12 @@ struct cgroup { > */ > atomic_t count; > > + /* > + * Incremented by online self and children. Used to guarantee that > + * parents are not offlined before their children. > + */ > + refcount_t online_cnt; > + > int id; /* ida allocated in-hierarchy ID */ > > /* > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 50d4b3df0f0a..17ba1ab9c7b0 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -1422,6 +1422,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) > INIT_LIST_HEAD(&root->subsys_list); > INIT_LIST_HEAD(&root->root_list); > INIT_LIST_HEAD(&root->allcg_list); > + refcount_set(&cgrp->online_cnt, 1); > root->number_of_cgroups = 1; > cgrp->root = root; > cgrp->name = &root_cgroup_name; > @@ -4238,8 +4239,13 @@ static int online_css(struct cgroup_subsys *ss, struct > cgroup *cgrp) > > if (ss->css_online) > ret = ss->css_online(cgrp); > - if (!ret) > + if (!ret) { > cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE; > + > + refcount_inc(&cgrp->online_cnt); > + if (cgrp->parent) > + refcount_inc(&cgrp->parent->online_cnt); > + } > return ret; > } > > @@ -4509,9 +4515,11 @@ static void cgroup_css_killed(struct cgroup *cgrp) > if (!atomic_dec_and_test(&cgrp->css_kill_cnt)) > return; > > - /* percpu ref's of all css's are killed, kick off the next step */ > - INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); > - queue_work(cgroup_destroy_wq, &cgrp->destroy_work); > + if (refcount_dec_and_test(&cgrp->online_cnt)) { > + /* percpu ref's of all css's are killed, kick off the next step > */ > + INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); > + queue_work(cgroup_destroy_wq, &cgrp->destroy_work); > + } > } > > static void css_ref_killed_fn(struct percpu_ref *ref) > @@ -4649,37 +4657,41 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) > static void cgroup_offline_fn(struct work_struct *work) > { > struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); > - struct cgroup *parent = cgrp->parent; > - struct dentry *d = cgrp->dentry; > struct cgroup_subsys *ss; > > mutex_lock(&cgroup_mutex); > > - /* > - * css_tryget() is guaranteed to fail now. Tell subsystems to > - * initate destruction. > - */ > - for_each_subsys(cgrp->root, ss) > - offline_css(ss, cgrp); > + do { > + struct cgroup *parent = cgrp->parent; > + struct dentry *d = cgrp->dentry; > > - /* > - * Put the css refs from cgroup_destroy_locked(). Each css holds > - * an extra reference to the cgroup's dentry and cgroup removal > - * proceeds regardless of css refs. On the last put of each css, > - * whenever that may be, the extra dentry ref is put so that dentry > - * destruction happens only after all css's are released. > - */ > - for_each_subsys(cgrp->root, ss) > - css_put(cgrp->subsys[ss->subsys_id]); > + /* > + * css_tryget() is guaranteed to fail now. Tell subsystems to > + * initate destruction. > + */ > + for_each_subsys(cgrp->root, ss) > + offline_css(ss, cgrp); > > - /* delete this cgroup from parent->children */ > - list_del_rcu(&cgrp->sibling); > - list_del_init(&cgrp->allcg_node); > + /* > + * Put the css refs from cgroup_destroy_locked(). Each css > holds > + * an extra reference to the cgroup's dentry and cgroup removal > + * proceeds regardless of css refs. On the last put of each > css, > + * whenever that may be, the extra dentry ref is put so that > dentry > + * destruction happens only after all css's are released. > + */ > + for_each_subsys(cgrp->root, ss) > + css_put(cgrp->subsys[ss->subsys_id]); > > - dput(d); > + /* delete this cgroup from parent->children */ > + list_del_rcu(&cgrp->sibling); > + list_del_init(&cgrp->allcg_node); > + > + dput(d); > > - set_bit(CGRP_RELEASABLE, &parent->flags); > - check_for_release(parent); > + set_bit(CGRP_RELEASABLE, &parent->flags); > + check_for_release(parent); > + cgrp = parent; > + } while (cgrp && refcount_dec_and_test(&cgrp->online_cnt)); > > mutex_unlock(&cgroup_mutex); > } > _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
