Each VE should execute release agent notifications within it's own workqueue. This way we achieve a more fine-grained control over release_agent work flushing at VE destruction.
Signed-off-by: Valeriy Vdovin <[email protected]> Reviewed-by: Kirill Tkhai <[email protected]> --- include/linux/cgroup.h | 1 + include/linux/ve.h | 10 +++++++++ kernel/cgroup.c | 55 +++++++++++++++++++++++--------------------------- kernel/ve/ve.c | 44 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 30 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9b9465c..5f1460d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -671,6 +671,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_task_count(const struct cgroup *cgrp); +void cgroup_release_agent(struct work_struct *work); #ifdef CONFIG_VE void cgroup_mark_ve_roots(struct ve_struct *ve); diff --git a/include/linux/ve.h b/include/linux/ve.h index 542fd08..4dbd216 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -126,7 +126,15 @@ struct ve_struct { #endif struct kmapset_key sysfs_perms_key; + /* + * cgroups, that want to notify about becoming + * empty, are linked to this release_list. + */ + struct list_head release_list; + struct raw_spinlock release_list_lock; + struct workqueue_struct *wq; + struct work_struct release_agent_work; /* * All tasks, that belong to this ve, live @@ -197,6 +205,8 @@ call_usermodehelper_ve(struct ve_struct *ve, char *path, char **argv, } void do_update_load_avg_ve(void); +void ve_add_to_release_list(struct cgroup *cgrp); +void ve_rm_from_release_list(struct cgroup *cgrp); extern struct ve_struct *get_ve(struct ve_struct *ve); extern void put_ve(struct ve_struct *ve); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6ee11be..e379d83 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -271,10 +271,6 @@ static bool cgroup_lock_live_group(struct cgroup *cgrp) /* the list of cgroups eligible for automatic release. Protected by * release_list_lock */ -static LIST_HEAD(release_list); -static DEFINE_RAW_SPINLOCK(release_list_lock); -static void cgroup_release_agent(struct work_struct *work); -static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); /* Link structure for associating css_set objects with cgroups */ @@ -4410,6 +4406,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve) mutex_unlock(&cgroup_mutex); /* ve_owner == NULL will be visible */ synchronize_rcu(); + + /* + * Anyone already waiting in this wq to execute + * cgroup_release_agent doesn't know that ve_owner is NULL, + * but we can wait for all of them at flush_workqueue. + * After it is complete no other cgroup can seep through + * to this ve's workqueue, so it's safe to shutdown ve. + */ + flush_workqueue(ve->wq); } struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp) @@ -4709,11 +4714,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) set_bit(CGRP_REMOVED, &cgrp->flags); - raw_spin_lock(&release_list_lock); - if (!list_empty(&cgrp->release_list)) - list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); - + ve_rm_from_release_list(cgrp); /* * Remove @cgrp directory. The removal puts the base ref but we * aren't quite done with @cgrp yet, so hold onto it. @@ -5501,17 +5502,7 @@ static void check_for_release(struct cgroup *cgrp) * already queued for a userspace notification, queue * it now */ - int need_schedule_work = 0; - - raw_spin_lock(&release_list_lock); - if (!cgroup_is_removed(cgrp) && - list_empty(&cgrp->release_list)) { - list_add(&cgrp->release_list, &release_list); - need_schedule_work = 1; - } - raw_spin_unlock(&release_list_lock); - if (need_schedule_work) - schedule_work(&release_agent_work); + ve_add_to_release_list(cgrp); } } @@ -5538,20 +5529,24 @@ static void check_for_release(struct cgroup *cgrp) * this routine has no use for the exit status of the release agent * task, so no sense holding our caller up for that. */ -static void cgroup_release_agent(struct work_struct *work) +void cgroup_release_agent(struct work_struct *work) { - BUG_ON(work != &release_agent_work); + struct ve_struct *ve; + ve = container_of(work, struct ve_struct, release_agent_work); mutex_lock(&cgroup_mutex); - raw_spin_lock(&release_list_lock); - while (!list_empty(&release_list)) { + raw_spin_lock(&ve->release_list_lock); + while (!list_empty(&ve->release_list)) { char *argv[3], *envp[3]; int i, err; char *pathbuf = NULL, *agentbuf = NULL; - struct cgroup *cgrp = list_entry(release_list.next, - struct cgroup, - release_list); + struct cgroup *cgrp; + + cgrp = list_entry(ve->release_list.next, + struct cgroup, + release_list); + list_del_init(&cgrp->release_list); - raw_spin_unlock(&release_list_lock); + raw_spin_unlock(&ve->release_list_lock); pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!pathbuf) goto continue_free; @@ -5586,9 +5581,9 @@ static void cgroup_release_agent(struct work_struct *work) continue_free: kfree(pathbuf); kfree(agentbuf); - raw_spin_lock(&release_list_lock); + raw_spin_lock(&ve->release_list_lock); } - raw_spin_unlock(&release_list_lock); + raw_spin_unlock(&ve->release_list_lock); mutex_unlock(&cgroup_mutex); } diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 711050c..9e6bb8b 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -87,6 +87,11 @@ struct ve_struct ve0 = { .netif_max_nr = INT_MAX, .arp_neigh_nr = ATOMIC_INIT(0), .nd_neigh_nr = ATOMIC_INIT(0), + .release_list_lock = __RAW_SPIN_LOCK_UNLOCKED( + ve0.release_list_lock), + .release_list = LIST_HEAD_INIT(ve0.release_list), + .release_agent_work = __WORK_INITIALIZER(ve0.release_agent_work, + cgroup_release_agent), }; EXPORT_SYMBOL(ve0); @@ -503,6 +508,41 @@ static void ve_workqueue_stop(struct ve_struct *ve) destroy_workqueue(ve->wq); } +void ve_add_to_release_list(struct cgroup *cgrp) +{ + struct ve_struct *ve; + int need_schedule_work = 0; + + rcu_read_lock(); + ve = cgroup_get_ve_owner(cgrp); + + raw_spin_lock(&ve->release_list_lock); + if (!cgroup_is_removed(cgrp) && + list_empty(&cgrp->release_list)) { + list_add(&cgrp->release_list, &ve->release_list); + need_schedule_work = 1; + } + raw_spin_unlock(&ve->release_list_lock); + + if (need_schedule_work) + queue_work(ve->wq, &ve->release_agent_work); + + rcu_read_unlock(); +} + +void ve_rm_from_release_list(struct cgroup *cgrp) +{ + struct ve_struct *ve; + rcu_read_lock(); + ve = cgroup_get_ve_owner(cgrp); + + raw_spin_lock(&ve->release_list_lock); + if (!list_empty(&cgrp->release_list)) + list_del_init(&cgrp->release_list); + raw_spin_unlock(&ve->release_list_lock); + rcu_read_unlock(); +} + /* under ve->op_sem write-lock */ static int ve_start_container(struct ve_struct *ve) { @@ -698,6 +738,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup *cg) if (!ve->ve_name) goto err_name; + INIT_WORK(&ve->release_agent_work, cgroup_release_agent); + raw_spin_lock_init(&ve->release_list_lock); + ve->_randomize_va_space = ve0._randomize_va_space; ve->features = VE_FEATURES_DEF; @@ -732,6 +775,7 @@ do_init: INIT_LIST_HEAD(&ve->devices); INIT_LIST_HEAD(&ve->ve_list); INIT_LIST_HEAD(&ve->devmnt_list); + INIT_LIST_HEAD(&ve->release_list); mutex_init(&ve->devmnt_mutex); #ifdef CONFIG_AIO -- 1.8.3.1 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
