This patch adds fork/exit callbacks to container subsystems, and
ensures that every registered container has received one fork callback
for each task running int the system, and one exit callback for each
task that exited since it was registered.

Since the fork/exit path is performance sensitive, an RCU-protected
flag indicates to the fork/exit hooks whether they need to take the
callback mutex and scan the list of registered subsystems to look for
fork/exit handlers.

 Documentation/containers.txt |   13 ++++
 include/linux/container.h    |    4 +
 kernel/container.c           |  123 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 125 insertions(+), 15 deletions(-)

Index: linux-2.6.18/include/linux/container.h
===================================================================
--- linux-2.6.18.orig/include/linux/container.h
+++ linux-2.6.18/include/linux/container.h
@@ -98,6 +98,10 @@ struct container_subsys {
                            struct container *cont,
                            struct container *old_cont,
                            struct task_struct *tsk);
+       void (*fork)(struct container_subsys *ss, struct container *cont,
+                    struct task_struct *task);
+       void (*exit)(struct container_subsys *ss, struct container *cont,
+                    struct task_struct *task);
        int (*populate)(struct container_subsys *ss,
                        struct container *cont);
 
Index: linux-2.6.18/kernel/container.c
===================================================================
--- linux-2.6.18.orig/kernel/container.c
+++ linux-2.6.18/kernel/container.c
@@ -72,6 +72,21 @@ int number_of_containers __read_mostly;
 static struct container_subsys *subsys[MAX_CONTAINER_SUBSYS];
 static int subsys_count = 0;
 
+/* This flag indicates whether tasks in the fork and exit paths should
+ * take callback_mutex and check for fork/exit handlers to call. This
+ * avoids us having to take locks in the fork/exit path if none of the
+ * subsystems need to be called.
+ *
+ * It is protected via RCU, with the invariant that a process in an
+ * rcu_read_lock() section will never see this as 0 if there are
+ * actually registered subsystems with a fork or exit
+ * handler. (Sometimes it may be 1 without there being any registered
+ * subsystems with such a handler, but such periods are safe and of
+ * short duration).
+ */
+
+static int need_forkexit_callback = 0;
+
 /* bits in struct container flags field */
 typedef enum {
        CONT_REMOVED,
@@ -475,7 +490,6 @@ static int update_flag(container_flagbit
        return 0;
 }
 
-
 /*
  * Attack task specified by pid in 'pidbuf' to container 'cont', possibly
  * writing the path of the old container in 'ppathbuf' if it needs to be
@@ -1122,8 +1136,9 @@ static long container_create(struct cont
 
  err_remove:
        for (s = subsys_count - 1; s >= 0; s--) {
-               if (subsys[s]->enabled)
-                       subsys[s]->destroy(ss, cont);
+               struct container_subsys *ss = subsys[s];
+               if (ss->enabled)
+                       ss->destroy(ss, cont);
        }
 
        mutex_lock(&callback_mutex);
@@ -1187,8 +1202,9 @@ static int container_rmdir(struct inode 
        number_of_containers--;
        mutex_unlock(&callback_mutex);
        for (s = 0; s < subsys_count; s++) {
-               if (subsys[s]->enabled)
-                       subsys[s]->destroy(ss, cont);
+               struct container_subsys *ss = subsys[s];
+               if (ss->enabled)
+                       ss->destroy(ss, cont);
        }
        if (list_empty(&parent->children))
                check_for_release(parent, &pathbuf);
@@ -1204,9 +1220,7 @@ static int container_rmdir(struct inode 
 
 int __init container_init_early(void)
 {
-       struct task_struct *tsk = current;
-
-       tsk->container = &top_container;
+       current->container = &top_container;
        return 0;
 }
 
@@ -1247,6 +1261,7 @@ out:
 int container_register_subsys(struct container_subsys *new_subsys) {
        int retval = 0;
        int i;
+
        mutex_lock(&manage_mutex);
        if (number_of_containers > 1) {
                retval = -EBUSY;
@@ -1269,15 +1284,46 @@ int container_register_subsys(struct con
                }
        }
 
-       subsys[subsys_count] = new_subsys;
-       new_subsys->subsys_id = subsys_count++;
-       retval = new_subsys->create(ss, &top_container);
+       new_subsys->subsys_id = subsys_count;
+       retval = new_subsys->create(new_subsys, &top_container);
        if (retval) {
                new_subsys->subsys_id = -1;
                subsys_count--;
                goto out;
        }
 
+       mutex_lock(&callback_mutex);
+       /* If this is the first subsystem that requested a fork or
+        * exit callback, tell our fork/exit hooks that they need to
+        * grab callback_mutex on every invocation. If they are
+        * running concurrently with this code, they will either not
+        * see the change now and go straight on, or they will see it
+        * and grab callback_mutex, which will deschedule them. Either
+        * way once synchronize_rcu() returns we know that all current
+        * and future forks will make the callbacks. */
+       if (!need_forkexit_callback &&
+           (new_subsys->fork || new_subsys->exit)) {
+               need_forkexit_callback = 1;
+               synchronize_rcu();
+       }
+
+       /* If this subsystem requested that it be notified with fork
+        * events, we should send it one now for every process in the
+        * system */
+       if (new_subsys->fork) {
+               struct task_struct *g, *p;
+
+               read_lock(&tasklist_lock);
+               do_each_thread(g, p) {
+                       new_subsys->fork(new_subsys, &top_container, p);
+               } while_each_thread(g, p);
+               read_unlock(&tasklist_lock);
+       }
+
+       subsys[subsys_count] = new_subsys;
+       subsys_count++;
+       mutex_unlock(&callback_mutex);
+
        /* Set up the per-container "enabled" file */
        strcpy(new_subsys->enable_cft_filename, new_subsys->name);
        strcat(new_subsys->enable_cft_filename, "_enabled");
@@ -1287,9 +1333,10 @@ int container_register_subsys(struct con
        /* Only populate the top container if we've done
         * container_init() */
        if (container_mount && new_subsys->populate) {
-               new_subsys->populate(&top_container);
+               new_subsys->populate(new_subsys, &top_container);
                container_add_file(&top_container, &new_subsys->enable_cft);
        }
+
  out:
        mutex_unlock(&manage_mutex);
        return retval;
@@ -1315,10 +1362,35 @@ int container_register_subsys(struct con
 
 void container_fork(struct task_struct *child)
 {
+       struct container *cont;
+       int s, need_callback;
+
+       rcu_read_lock();
+       /* need_forkexit_callback will be true if we might need to do
+        * a callback */
+       need_callback = rcu_dereference(need_forkexit_callback);
+       if (need_callback) {
+               rcu_read_unlock();
+               mutex_lock(&callback_mutex);
+       }
        task_lock(current);
-       child->container = current->container;
-       atomic_inc(&child->container->count);
+       cont = current->container;
+       child->container = cont;
+       atomic_inc(&cont->count);
+       if (need_callback) {
+               for (s = 0; s < subsys_count; s++) {
+                       struct container_subsys *ss = subsys[s];
+                       if (ss->fork) {
+                               ss->fork(ss, cont, child);
+                       }
+               }
+       }
        task_unlock(current);
+       if (need_callback) {
+               mutex_unlock(&callback_mutex);
+       } else {
+               rcu_read_unlock();
+       }
 }
 
 /**
@@ -1383,9 +1455,30 @@ void container_fork(struct task_struct *
 void container_exit(struct task_struct *tsk)
 {
        struct container *cont;
+       int s, need_callback;
 
        cont = tsk->container;
-       tsk->container = &top_container;        /* the_top_container_hack - see 
above */
+
+       rcu_read_lock();
+       need_callback = rcu_dereference(need_forkexit_callback);
+       if (need_callback) {
+               rcu_read_unlock();
+               mutex_lock(&callback_mutex);
+               for (s = 0; s < subsys_count; s++) {
+                       struct container_subsys *ss = subsys[s];
+                       if (ss->exit) {
+                               ss->exit(ss, cont, tsk);
+                       }
+               }
+               mutex_unlock(&callback_mutex);
+       } else {
+               rcu_read_unlock();
+       }
+
+       if (cont != &top_container) {
+               /* the_top_container_hack - see above */
+               tsk->container = &top_container;
+       }
 
        if (notify_on_release(cont)) {
                char *pathbuf = NULL;
Index: linux-2.6.18/Documentation/containers.txt
===================================================================
--- linux-2.6.18.orig/Documentation/containers.txt
+++ linux-2.6.18/Documentation/containers.txt
@@ -353,6 +353,19 @@ LL=manage_mutex
 Called after the task has been attached to the container, to allow any
 post-attachment activity that requires memory allocations or blocking.
 
+void fork(struct container_subsy *ss, struct container *cont,
+         struct task_struct *task)
+LL=callback_mutex, maybe read_lock(tasklist_lock)
+
+Called when a task is forked into a container. Also called during
+registration for all existing tasks.
+
+void exit(struct container_subsys *ss, struct container *cont,
+         struct task_struct *task)
+LL=callback_mutex
+
+Called during task exit
+
 int populate(struct container_subsys *ss, struct container *cont)
 LL=none
 

--

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
ckrm-tech mailing list
https://lists.sourceforge.net/lists/listinfo/ckrm-tech

Reply via email to