Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=cf7b708c8d1d7a27736771bcf4c457b332b0f818
Commit:     cf7b708c8d1d7a27736771bcf4c457b332b0f818
Parent:     a6f5e06378970a2687332c2d54046245fcff1e7e
Author:     Pavel Emelyanov <[EMAIL PROTECTED]>
AuthorDate: Thu Oct 18 23:39:54 2007 -0700
Committer:  Linus Torvalds <[EMAIL PROTECTED]>
CommitDate: Fri Oct 19 11:53:37 2007 -0700

    Make access to task's nsproxy lighter
    
    When someone wants to deal with some other taks's namespaces it has to lock
    the task and then to get the desired namespace if the one exists.  This is
    slow on read-only paths and may be impossible in some cases.
    
    E.g.  Oleg recently noticed a race between unshare() and the (sent for
    review in cgroups) pid namespaces - when the task notifies the parent it
    has to know the parent's namespace, but taking the task_lock() is
    impossible there - the code is under write locked tasklist lock.
    
    On the other hand switching the namespace on task (daemonize) and releasing
    the namespace (after the last task exit) is rather rare operation and we
    can sacrifice its speed to solve the issues above.
    
    The access to other task namespaces is proposed to be performed
    like this:
    
         rcu_read_lock();
         nsproxy = task_nsproxy(tsk);
         if (nsproxy != NULL) {
                 / *
                   * work with the namespaces here
                   * e.g. get the reference on one of them
                   * /
         } / *
             * NULL task_nsproxy() means that this task is
             * almost dead (zombie)
             * /
         rcu_read_unlock();
    
    This patch has passed the review by Eric and Oleg :) and,
    of course, tested.
    
    [EMAIL PROTECTED]: fix unshare()]
    [EMAIL PROTECTED]: Update get_net_ns_by_pid]
    Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]>
    Signed-off-by: Eric W. Biederman <[EMAIL PROTECTED]>
    Cc: Oleg Nesterov <[EMAIL PROTECTED]>
    Cc: Paul E. McKenney <[EMAIL PROTECTED]>
    Cc: Serge Hallyn <[EMAIL PROTECTED]>
    Signed-off-by: Cedric Le Goater <[EMAIL PROTECTED]>
    Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
    Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>
---
 fs/proc/base.c          |   27 +++++++++++++++++----------
 include/linux/nsproxy.h |   43 ++++++++++++++++++++++++++++++++++---------
 kernel/exit.c           |    7 ++++---
 kernel/fork.c           |   11 +++++------
 kernel/nsproxy.c        |   40 +++++++++++++++++++++++++++-------------
 net/core/rtnetlink.c    |    8 ++++----
 6 files changed, 91 insertions(+), 45 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index fbff900..6afca09 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -350,18 +350,21 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
        struct task_struct *task = get_proc_task(inode);
+       struct nsproxy *nsp;
        struct mnt_namespace *ns = NULL;
        struct proc_mounts *p;
        int ret = -EINVAL;
 
        if (task) {
-               task_lock(task);
-               if (task->nsproxy) {
-                       ns = task->nsproxy->mnt_ns;
+               rcu_read_lock();
+               nsp = task_nsproxy(task);
+               if (nsp) {
+                       ns = nsp->mnt_ns;
                        if (ns)
                                get_mnt_ns(ns);
                }
-               task_unlock(task);
+               rcu_read_unlock();
+
                put_task_struct(task);
        }
 
@@ -424,16 +427,20 @@ static int mountstats_open(struct inode *inode, struct 
file *file)
 
        if (!ret) {
                struct seq_file *m = file->private_data;
+               struct nsproxy *nsp;
                struct mnt_namespace *mnt_ns = NULL;
                struct task_struct *task = get_proc_task(inode);
 
                if (task) {
-                       task_lock(task);
-                       if (task->nsproxy)
-                               mnt_ns = task->nsproxy->mnt_ns;
-                       if (mnt_ns)
-                               get_mnt_ns(mnt_ns);
-                       task_unlock(task);
+                       rcu_read_lock();
+                       nsp = task_nsproxy(task);
+                       if (nsp) {
+                               mnt_ns = nsp->mnt_ns;
+                               if (mnt_ns)
+                                       get_mnt_ns(mnt_ns);
+                       }
+                       rcu_read_unlock();
+
                        put_task_struct(task);
                }
 
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index f1eca68..0e66b57 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -32,8 +32,39 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
+/*
+ * the namespaces access rules are:
+ *
+ *  1. only current task is allowed to change tsk->nsproxy pointer or
+ *     any pointer on the nsproxy itself
+ *
+ *  2. when accessing (i.e. reading) current task's namespaces - no
+ *     precautions should be taken - just dereference the pointers
+ *
+ *  3. the access to other task namespaces is performed like this
+ *     rcu_read_lock();
+ *     nsproxy = task_nsproxy(tsk);
+ *     if (nsproxy != NULL) {
+ *             / *
+ *               * work with the namespaces here
+ *               * e.g. get the reference on one of them
+ *               * /
+ *     } / *
+ *         * NULL task_nsproxy() means that this task is
+ *         * almost dead (zombie)
+ *         * /
+ *     rcu_read_unlock();
+ *
+ */
+
+static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
+{
+       return rcu_dereference(tsk->nsproxy);
+}
+
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
-void get_task_namespaces(struct task_struct *tsk);
+void exit_task_namespaces(struct task_struct *tsk);
+void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
        struct fs_struct *);
@@ -45,15 +76,9 @@ static inline void put_nsproxy(struct nsproxy *ns)
        }
 }
 
-static inline void exit_task_namespaces(struct task_struct *p)
+static inline void get_nsproxy(struct nsproxy *ns)
 {
-       struct nsproxy *ns = p->nsproxy;
-       if (ns) {
-               task_lock(p);
-               p->nsproxy = NULL;
-               task_unlock(p);
-               put_nsproxy(ns);
-       }
+       atomic_inc(&ns->count);
 }
 
 #ifdef CONFIG_CGROUP_NS
diff --git a/kernel/exit.c b/kernel/exit.c
index d22aefa..db97641 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -400,9 +400,10 @@ void daemonize(const char *name, ...)
        current->fs = fs;
        atomic_inc(&fs->count);
 
-       exit_task_namespaces(current);
-       current->nsproxy = init_task.nsproxy;
-       get_task_namespaces(current);
+       if (current->nsproxy != init_task.nsproxy) {
+               get_nsproxy(init_task.nsproxy);
+               switch_task_namespaces(current, init_task.nsproxy);
+       }
 
        exit_files(current);
        current->files = init_task.files;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2deaf48..d2f4a42 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1632,7 +1632,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
        struct files_struct *fd, *new_fd = NULL;
        struct sem_undo_list *new_ulist = NULL;
-       struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+       struct nsproxy *new_nsproxy = NULL;
 
        check_unshare_flags(&unshare_flags);
 
@@ -1662,14 +1662,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 
        if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
 
-               task_lock(current);
-
                if (new_nsproxy) {
-                       old_nsproxy = current->nsproxy;
-                       current->nsproxy = new_nsproxy;
-                       new_nsproxy = old_nsproxy;
+                       switch_task_namespaces(current, new_nsproxy);
+                       new_nsproxy = NULL;
                }
 
+               task_lock(current);
+
                if (new_fs) {
                        fs = current->fs;
                        current->fs = new_fs;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index e981c61..c8ef7c2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep;
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
-static inline void get_nsproxy(struct nsproxy *ns)
-{
-       atomic_inc(&ns->count);
-}
-
-void get_task_namespaces(struct task_struct *tsk)
-{
-       struct nsproxy *ns = tsk->nsproxy;
-       if (ns) {
-               get_nsproxy(ns);
-       }
-}
-
 /*
  * creates a copy of "orig" with refcount 1.
  */
@@ -216,6 +203,33 @@ out:
        return err;
 }
 
+void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
+{
+       struct nsproxy *ns;
+
+       might_sleep();
+
+       ns = p->nsproxy;
+
+       rcu_assign_pointer(p->nsproxy, new);
+
+       if (ns && atomic_dec_and_test(&ns->count)) {
+               /*
+                * wait for others to get what they want from this nsproxy.
+                *
+                * cannot release this nsproxy via the call_rcu() since
+                * put_mnt_ns() will want to sleep
+                */
+               synchronize_rcu();
+               free_nsproxy(ns);
+       }
+}
+
+void exit_task_namespaces(struct task_struct *p)
+{
+       switch_task_namespaces(p, NULL);
+}
+
 static int __init nsproxy_cache_init(void)
 {
        nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1072d16..4a2640d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid)
        rcu_read_lock();
        tsk = find_task_by_pid(pid);
        if (tsk) {
-               task_lock(tsk);
-               if (tsk->nsproxy)
-                       net = get_net(tsk->nsproxy->net_ns);
-               task_unlock(tsk);
+               struct nsproxy *nsproxy;
+               nsproxy = task_nsproxy(tsk);
+               if (nsproxy)
+                       net = get_net(nsproxy->net_ns);
        }
        rcu_read_unlock();
        return net;
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to