Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=b4f48b6363c81ca743ef46943ef23fd72e60f679
Commit:     b4f48b6363c81ca743ef46943ef23fd72e60f679
Parent:     355e0c48b757b7fcc79ccb98fda8105ed37a1598
Author:     Paul Menage <[EMAIL PROTECTED]>
AuthorDate: Thu Oct 18 23:39:33 2007 -0700
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Fri Oct 19 11:53:36 2007 -0700

    Task Control Groups: add fork()/exit() hooks
    
    This adds the necessary hooks to the fork() and exit() paths to ensure
    that new children inherit their parent's cgroup assignments, and that
    exiting processes release reference counts on their cgroups.
    
    Signed-off-by: Paul Menage <[EMAIL PROTECTED]>
    Cc: Serge E. Hallyn <[EMAIL PROTECTED]>
    Cc: "Eric W. Biederman" <[EMAIL PROTECTED]>
    Cc: Dave Hansen <[EMAIL PROTECTED]>
    Cc: Balbir Singh <[EMAIL PROTECTED]>
    Cc: Paul Jackson <[EMAIL PROTECTED]>
    Cc: Kirill Korotaev <[EMAIL PROTECTED]>
    Cc: Herbert Poetzl <[EMAIL PROTECTED]>
    Cc: Srivatsa Vaddagiri <[EMAIL PROTECTED]>
    Cc: Cedric Le Goater <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 include/linux/cgroup.h |    6 ++
 kernel/cgroup.c        |  121 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/exit.c          |    2 +
 kernel/fork.c          |   14 +++++-
 4 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e95143c..792ad74 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -25,6 +25,9 @@ extern int cgroup_init(void);
 extern void cgroup_init_smp(void);
 extern void cgroup_lock(void);
 extern void cgroup_unlock(void);
+extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork_callbacks(struct task_struct *p);
+extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 
 /* Per-subsystem/per-cgroup state maintained by the system. */
 struct cgroup_subsys_state {
@@ -223,6 +226,9 @@ int cgroup_path(const struct cgroup *cont, char *buf, int 
buflen);
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 static inline void cgroup_init_smp(void) {}
+static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork_callbacks(struct task_struct *p) {}
+static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
 
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f4c4dce..7bb520a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -132,6 +132,33 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling)
 #define for_each_root(_root) \
 list_for_each_entry(_root, &roots, root_list)
 
+/* Each task_struct has an embedded css_set, so the get/put
+ * operation simply takes a reference count on all the cgroups
+ * referenced by subsystems in this css_set. This can end up
+ * multiple-counting some cgroups, but that's OK - the ref-count is
+ * just a busy/not-busy indicator; ensuring that we only count each
+ * cgroup once would require taking a global lock to ensure that no
+ * subsystems moved between hierarchies while we were doing so.
+ *
+ * Possible TODO: decide at boot time based on the number of
+ * registered subsystems and the number of CPUs or NUMA nodes whether
+ * it's better for performance to ref-count every subsystem, or to
+ * take a global lock and only add one ref count to each hierarchy.
+ */
+static void get_css_set(struct css_set *cg)
+{
+       int i;
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
+               atomic_inc(&cg->subsys[i]->cgroup->count);
+}
+
+static void put_css_set(struct css_set *cg)
+{
+       int i;
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
+               atomic_dec(&cg->subsys[i]->cgroup->count);
+}
+
 /*
  * There is one global cgroup mutex. We also require taking
  * task_lock() when dereferencing a task's cgroup subsys pointers.
@@ -1587,3 +1614,97 @@ int __init cgroup_init(void)
 out:
        return err;
 }
+
+/**
+ * cgroup_fork - attach newly forked task to its parents cgroup.
+ * @tsk: pointer to task_struct of forking parent process.
+ *
+ * Description: A task inherits its parent's cgroup at fork().
+ *
+ * A pointer to the shared css_set was automatically copied in
+ * fork.c by dup_task_struct().  However, we ignore that copy, since
+ * it was not made under the protection of RCU or cgroup_mutex, so
+ * might no longer be a valid cgroup pointer.  attach_task() might
+ * have already changed current->cgroup, allowing the previously
+ * referenced cgroup to be removed and freed.
+ *
+ * At the point that cgroup_fork() is called, 'current' is the parent
+ * task, and the passed argument 'child' points to the child task.
+ */
+void cgroup_fork(struct task_struct *child)
+{
+       rcu_read_lock();
+       child->cgroups = rcu_dereference(current->cgroups);
+       get_css_set(&child->cgroups);
+       rcu_read_unlock();
+}
+
+/**
+ * cgroup_fork_callbacks - called on a new task very soon before
+ * adding it to the tasklist. No need to take any locks since no-one
+ * can be operating on this task
+ */
+void cgroup_fork_callbacks(struct task_struct *child)
+{
+       if (need_forkexit_callback) {
+               int i;
+               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                       struct cgroup_subsys *ss = subsys[i];
+                       if (ss->fork)
+                               ss->fork(ss, child);
+               }
+       }
+}
+
+/**
+ * cgroup_exit - detach cgroup from exiting task
+ * @tsk: pointer to task_struct of exiting process
+ *
+ * Description: Detach cgroup from @tsk and release it.
+ *
+ * Note that cgroups marked notify_on_release force every task in
+ * them to take the global cgroup_mutex mutex when exiting.
+ * This could impact scaling on very large systems.  Be reluctant to
+ * use notify_on_release cgroups where very high task exit scaling
+ * is required on large systems.
+ *
+ * the_top_cgroup_hack:
+ *
+ *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
+ *
+ *    We call cgroup_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to the
+ *    root cgroup in each hierarchy for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cgroup function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cgroup reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cgroup without bumping its
+ *    count is unsafe.   The cgroup could go away, or someone could
+ *    attach us to a different cgroup, decrementing the count on
+ *    the first cgroup that we never incremented.  But in this case,
+ *    top_cgroup isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ */
+void cgroup_exit(struct task_struct *tsk, int run_callbacks)
+{
+       int i;
+
+       if (run_callbacks && need_forkexit_callback) {
+               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                       struct cgroup_subsys *ss = subsys[i];
+                       if (ss->exit)
+                               ss->exit(ss, tsk);
+               }
+       }
+       /* Reassign the task to the init_css_set. */
+       task_lock(tsk);
+       put_css_set(&tsk->cgroups);
+       tsk->cgroups = init_task.cgroups;
+       task_unlock(tsk);
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index 2c704c8..44ff614 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -32,6 +32,7 @@
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
 #include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
@@ -973,6 +974,7 @@ fastcall NORET_TYPE void do_exit(long code)
        check_stack_usage();
        exit_thread();
        cpuset_exit(tsk);
+       cgroup_exit(tsk, 1);
        exit_keys(tsk);
 
        if (group_dead && tsk->signal->leader)
diff --git a/kernel/fork.c b/kernel/fork.c
index 2ce28f1..e7c1814 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -30,6 +30,7 @@
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
@@ -979,6 +980,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
 {
        int retval;
        struct task_struct *p = NULL;
+       int cgroup_callbacks_done = 0;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -1088,12 +1090,13 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
        p->io_context = NULL;
        p->audit_context = NULL;
        cpuset_fork(p);
+       cgroup_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_copy(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
-               goto bad_fork_cleanup_cpuset;
+               goto bad_fork_cleanup_cgroup;
        }
        mpol_fix_fork_child_flag(p);
 #endif
@@ -1204,6 +1207,12 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
 
+       /* Now that the task is set up, run cgroup callbacks if
+        * necessary. We need to run them before the task is visible
+        * on the tasklist. */
+       cgroup_fork_callbacks(p);
+       cgroup_callbacks_done = 1;
+
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
@@ -1318,9 +1327,10 @@ bad_fork_cleanup_security:
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_free(p->mempolicy);
-bad_fork_cleanup_cpuset:
+bad_fork_cleanup_cgroup:
 #endif
        cpuset_exit(p);
+       cgroup_exit(p, cgroup_callbacks_done);
 bad_fork_cleanup_delays_binfmt:
        delayacct_tsk_free(p);
        if (p->binfmt)
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to