Each VE should execute release agent notifications within it's own
workqueue. This way we achieve a more fine-grained control over
release_agent work flushing at VE destruction.

Signed-off-by: Valeriy Vdovin <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]>
---
 include/linux/cgroup.h |  1 +
 include/linux/ve.h     | 10 +++++++++
 kernel/cgroup.c        | 55 +++++++++++++++++++++++---------------------------
 kernel/ve/ve.c         | 44 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 30 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9b9465c..5f1460d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -671,6 +671,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int 
buflen);
 int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
+void cgroup_release_agent(struct work_struct *work);
 
 #ifdef CONFIG_VE
 void cgroup_mark_ve_roots(struct ve_struct *ve);
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 542fd08..4dbd216 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -126,7 +126,15 @@ struct ve_struct {
 #endif
        struct kmapset_key      sysfs_perms_key;
 
+       /*
+        * cgroups, that want to notify about becoming
+        * empty, are linked to this release_list.
+        */
+       struct list_head        release_list;
+       struct raw_spinlock     release_list_lock;
+
        struct workqueue_struct *wq;
+       struct work_struct      release_agent_work;
 
        /*
         * All tasks, that belong to this ve, live
@@ -197,6 +205,8 @@ call_usermodehelper_ve(struct ve_struct *ve, char *path, 
char **argv,
 }
 void do_update_load_avg_ve(void);
 
+void ve_add_to_release_list(struct cgroup *cgrp);
+void ve_rm_from_release_list(struct cgroup *cgrp);
 extern struct ve_struct *get_ve(struct ve_struct *ve);
 extern void put_ve(struct ve_struct *ve);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 6ee11be..e379d83 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -271,10 +271,6 @@ static bool cgroup_lock_live_group(struct cgroup *cgrp)
 
 /* the list of cgroups eligible for automatic release. Protected by
  * release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
-static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
 /* Link structure for associating css_set objects with cgroups */
@@ -4410,6 +4406,15 @@ void cgroup_unmark_ve_roots(struct ve_struct *ve)
        mutex_unlock(&cgroup_mutex);
        /* ve_owner == NULL will be visible */
        synchronize_rcu();
+
+       /*
+        * Anyone already waiting in this wq to execute
+        * cgroup_release_agent doesn't know that ve_owner is NULL,
+        * but we can wait for all of them at flush_workqueue.
+        * After it is complete no other cgroup can seep through
+        * to this ve's workqueue, so it's safe to shutdown ve.
+        */
+       flush_workqueue(ve->wq);
 }
 
 struct cgroup *cgroup_get_ve_root(struct cgroup *cgrp)
@@ -4709,11 +4714,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
        set_bit(CGRP_REMOVED, &cgrp->flags);
 
-       raw_spin_lock(&release_list_lock);
-       if (!list_empty(&cgrp->release_list))
-               list_del_init(&cgrp->release_list);
-       raw_spin_unlock(&release_list_lock);
-
+       ve_rm_from_release_list(cgrp);
        /*
         * Remove @cgrp directory.  The removal puts the base ref but we
         * aren't quite done with @cgrp yet, so hold onto it.
@@ -5501,17 +5502,7 @@ static void check_for_release(struct cgroup *cgrp)
                 * already queued for a userspace notification, queue
                 * it now
                 */
-               int need_schedule_work = 0;
-
-               raw_spin_lock(&release_list_lock);
-               if (!cgroup_is_removed(cgrp) &&
-                   list_empty(&cgrp->release_list)) {
-                       list_add(&cgrp->release_list, &release_list);
-                       need_schedule_work = 1;
-               }
-               raw_spin_unlock(&release_list_lock);
-               if (need_schedule_work)
-                       schedule_work(&release_agent_work);
+               ve_add_to_release_list(cgrp);
        }
 }
 
@@ -5538,20 +5529,24 @@ static void check_for_release(struct cgroup *cgrp)
  * this routine has no use for the exit status of the release agent
  * task, so no sense holding our caller up for that.
  */
-static void cgroup_release_agent(struct work_struct *work)
+void cgroup_release_agent(struct work_struct *work)
 {
-       BUG_ON(work != &release_agent_work);
+       struct ve_struct *ve;
+       ve = container_of(work, struct ve_struct, release_agent_work);
        mutex_lock(&cgroup_mutex);
-       raw_spin_lock(&release_list_lock);
-       while (!list_empty(&release_list)) {
+       raw_spin_lock(&ve->release_list_lock);
+       while (!list_empty(&ve->release_list)) {
                char *argv[3], *envp[3];
                int i, err;
                char *pathbuf = NULL, *agentbuf = NULL;
-               struct cgroup *cgrp = list_entry(release_list.next,
-                                                   struct cgroup,
-                                                   release_list);
+               struct cgroup *cgrp;
+
+               cgrp = list_entry(ve->release_list.next,
+                                 struct cgroup,
+                                 release_list);
+
                list_del_init(&cgrp->release_list);
-               raw_spin_unlock(&release_list_lock);
+               raw_spin_unlock(&ve->release_list_lock);
                pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
                if (!pathbuf)
                        goto continue_free;
@@ -5586,9 +5581,9 @@ static void cgroup_release_agent(struct work_struct *work)
  continue_free:
                kfree(pathbuf);
                kfree(agentbuf);
-               raw_spin_lock(&release_list_lock);
+               raw_spin_lock(&ve->release_list_lock);
        }
-       raw_spin_unlock(&release_list_lock);
+       raw_spin_unlock(&ve->release_list_lock);
        mutex_unlock(&cgroup_mutex);
 }
 
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 711050c..9e6bb8b 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -87,6 +87,11 @@ struct ve_struct ve0 = {
        .netif_max_nr           = INT_MAX,
        .arp_neigh_nr           = ATOMIC_INIT(0),
        .nd_neigh_nr            = ATOMIC_INIT(0),
+       .release_list_lock      = __RAW_SPIN_LOCK_UNLOCKED(
+                                       ve0.release_list_lock),
+       .release_list           = LIST_HEAD_INIT(ve0.release_list),
+       .release_agent_work     = __WORK_INITIALIZER(ve0.release_agent_work,
+                                       cgroup_release_agent),
 };
 EXPORT_SYMBOL(ve0);
 
@@ -503,6 +508,41 @@ static void ve_workqueue_stop(struct ve_struct *ve)
        destroy_workqueue(ve->wq);
 }
 
+void ve_add_to_release_list(struct cgroup *cgrp)
+{
+       struct ve_struct *ve;
+       int need_schedule_work = 0;
+
+       rcu_read_lock();
+       ve = cgroup_get_ve_owner(cgrp);
+
+       raw_spin_lock(&ve->release_list_lock);
+       if (!cgroup_is_removed(cgrp) &&
+           list_empty(&cgrp->release_list)) {
+               list_add(&cgrp->release_list, &ve->release_list);
+               need_schedule_work = 1;
+       }
+       raw_spin_unlock(&ve->release_list_lock);
+
+       if (need_schedule_work)
+               queue_work(ve->wq, &ve->release_agent_work);
+
+       rcu_read_unlock();
+}
+
+void ve_rm_from_release_list(struct cgroup *cgrp)
+{
+       struct ve_struct *ve;
+       rcu_read_lock();
+       ve = cgroup_get_ve_owner(cgrp);
+
+       raw_spin_lock(&ve->release_list_lock);
+       if (!list_empty(&cgrp->release_list))
+               list_del_init(&cgrp->release_list);
+       raw_spin_unlock(&ve->release_list_lock);
+       rcu_read_unlock();
+}
+
 /* under ve->op_sem write-lock */
 static int ve_start_container(struct ve_struct *ve)
 {
@@ -698,6 +738,9 @@ static struct cgroup_subsys_state *ve_create(struct cgroup 
*cg)
        if (!ve->ve_name)
                goto err_name;
 
+       INIT_WORK(&ve->release_agent_work, cgroup_release_agent);
+       raw_spin_lock_init(&ve->release_list_lock);
+
        ve->_randomize_va_space = ve0._randomize_va_space;
 
        ve->features = VE_FEATURES_DEF;
@@ -732,6 +775,7 @@ do_init:
        INIT_LIST_HEAD(&ve->devices);
        INIT_LIST_HEAD(&ve->ve_list);
        INIT_LIST_HEAD(&ve->devmnt_list);
+       INIT_LIST_HEAD(&ve->release_list);
        mutex_init(&ve->devmnt_mutex);
 
 #ifdef CONFIG_AIO
-- 
1.8.3.1

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to