On 07/26, Oleg Nesterov wrote:
>
> On 07/26, Pavel Emelyanov wrote:
> >
> > Make task release its namespaces after it has reparented all his
> > children to child_reaper, but before it notifies its parent about
> > its death.
> > 
> > The reason to release namespaces after reparenting is that when task
> > exits it may send a signal to its parent (SIGCHLD), but if the parent
> > has already exited its namespaces there will be no way to decide what
> > pid to dever to him - parent can be from different namespace.
> >
> > The reason to release namespace before notifying the parent it that
> > when task sends a SIGCHLD to parent it can call wait() on this taks
> > and release it. But releasing the mnt namespace implies dropping
> > of all the mounts in the mnt namespace and NFS expects the task to
> > have valid sighand pointer.
> > 
> > Signed-off-by: Pavel Emelyanov <[EMAIL PROTECTED]>
> > 
> > ---
> > 
> > exit.c |    5 ++++-
> > 1 files changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff -upr linux-2.6.23-rc1-mm1.orig/kernel/exit.c 
> > linux-2.6.23-rc1-mm1-7/kernel/exit.c
> > --- linux-2.6.23-rc1-mm1.orig/kernel/exit.c 2007-07-26 
> > 16:34:45.000000000 +0400
> > +++ linux-2.6.23-rc1-mm1-7/kernel/exit.c    2007-07-26 
> > 16:36:37.000000000 +0400
> > @@ -788,6 +804,10 @@ static void exit_notify(struct task_stru
> >     BUG_ON(!list_empty(&tsk->children));
> >     BUG_ON(!list_empty(&tsk->ptrace_children));
> > 
> > +   write_unlock_irq(&tasklist_lock);
> > +   exit_task_namespaces(tsk);
> > +   write_lock_irq(&tasklist_lock);
> 
> No.
> 
> We "cleared" our ->children/->ptrace_children lists. Now suppose that
> another thread dies, and its forget_original_parent() choose us as a
> new reaper before we re-take tasklist.

Perhaps, we can do something like the patch below. Roland, what do you
think?

We can check PF_EXITING instead of ->exit_state while choosing the new
parent. Note that tasklits_lock acts as a barrier, everyone who takes
tasklist after us (when forget_original_parent() drops it) must see
PF_EXITING.

Oleg.

--- t/kernel/exit.c~    2007-07-27 11:32:21.000000000 +0400
+++ t/kernel/exit.c     2007-07-27 11:59:09.000000000 +0400
@@ -686,11 +686,14 @@ reparent_thread(struct task_struct *p, s
  * the child reaper process (ie "init") in our pid
  * space.
  */
-static void
-forget_original_parent(struct task_struct *father, struct list_head 
*to_release)
+static void forget_original_parent(struct task_struct *father)
 {
        struct task_struct *p, *reaper = father;
-       struct list_head *_p, *_n;
+       struct list_head *ptrace_dead, *_p, *_n;
+
+       INIT_LIST_HEAD(&ptrace_dead);
+
+       write_lock_irq(&tasklist_lock);
 
        do {
                reaper = next_thread(reaper);
@@ -698,7 +701,7 @@ forget_original_parent(struct task_struc
                        reaper = child_reaper(father);
                        break;
                }
-       } while (reaper->exit_state);
+       } while (reaper->flags & PF_EXITING);
 
        /*
         * There are only two places where our children can be:
@@ -736,13 +739,25 @@ forget_original_parent(struct task_struc
                 * while it was being traced by us, to be able to see it in 
wait4.
                 */
                if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && 
p->exit_signal == -1))
-                       list_add(&p->ptrace_list, to_release);
+                       list_add(&p->ptrace_list, &ptrace_dead);
        }
+
        list_for_each_safe(_p, _n, &father->ptrace_children) {
                p = list_entry(_p, struct task_struct, ptrace_list);
                choose_new_parent(p, reaper);
                reparent_thread(p, father, 1);
        }
+
+       write_unlock_irq(&tasklist_lock);
+       BUG_ON(!list_empty(&tsk->children));
+       BUG_ON(!list_empty(&tsk->ptrace_children));
+
+       list_for_each_safe(_p, _n, &ptrace_dead) {
+               list_del_init(_p);
+               t = list_entry(_p, struct task_struct, ptrace_list);
+               release_task(t);
+       }
+
 }
 
 /*
@@ -753,7 +768,6 @@ static void exit_notify(struct task_stru
 {
        int state;
        struct task_struct *t;
-       struct list_head ptrace_dead, *_p, *_n;
        struct pid *pgrp;
 
        if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
@@ -776,8 +790,6 @@ static void exit_notify(struct task_stru
                read_unlock(&tasklist_lock);
        }
 
-       write_lock_irq(&tasklist_lock);
-
        /*
         * This does two things:
         *
@@ -786,12 +798,9 @@ static void exit_notify(struct task_stru
         *      as a result of our exiting, and if they have any stopped
         *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
         */
+       forget_original_parent(tsk);
 
-       INIT_LIST_HEAD(&ptrace_dead);
-       forget_original_parent(tsk, &ptrace_dead);
-       BUG_ON(!list_empty(&tsk->children));
-       BUG_ON(!list_empty(&tsk->ptrace_children));
-
+       write_lock_irq(&tasklist_lock);
        /*
         * Check to see if any process groups have become orphaned
         * as a result of our exiting, and if they have any stopped
@@ -801,9 +810,8 @@ static void exit_notify(struct task_stru
         * and we were the only connection outside, so our pgrp
         * is about to become orphaned.
         */
-        
        t = tsk->real_parent;
-       
+
        pgrp = task_pgrp(tsk);
        if ((task_pgrp(t) != pgrp) &&
            (task_session(t) == task_session(tsk)) &&
@@ -826,9 +834,8 @@ static void exit_notify(struct task_stru
         * If our self_exec id doesn't match our parent_exec_id then
         * we have changed execution domain as these two values started
         * the same after a fork.
-        *      
         */
-       
+
        if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
            ( tsk->parent_exec_id != t->self_exec_id  ||
              tsk->self_exec_id != tsk->parent_exec_id)
@@ -856,12 +863,6 @@ static void exit_notify(struct task_stru
 
        write_unlock_irq(&tasklist_lock);
 
-       list_for_each_safe(_p, _n, &ptrace_dead) {
-               list_del_init(_p);
-               t = list_entry(_p, struct task_struct, ptrace_list);
-               release_task(t);
-       }
-
        /* If the process is dead, release it - nobody will wait for it */
        if (state == EXIT_DEAD)
                release_task(tsk);

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to