[EMAIL PROTECTED] writes:

> From: Sukadev Bhattiprolu <[EMAIL PROTECTED]>
> Subject: [RFC][PATCH 3/5] Use pid namespace from struct pid_nrs list
>
> Stop using task->nsproxy->pid_ns.  Use pid_namespace from pid->pid_nrs
> list instead.
>
> To simplify error handling, this patch moves processing of CLONE_NEWPID
> flag, currently in copy_namespaces()/copy_process(), to alloc_pid() which
> is where the process association with a pid namespace is established.
>
> i.e when cloning a new pid namespace, alloc_pid() allocates a new pid_nr
> for both the parent and child namespaces.


This patch seems to do a bit much, it is hard to follow what changes you
are making.

It probably makes sense to modify things so alloc_pid can do everything
it needs to.

It looks like we can safely move alloc_pid into copy_process and
just dig out the pid number and place it in nr if copy_process succeeds.

Which should allow the special case for setting the child reaper to go
away, because we can allocate the task_struct before allocating the struct
pid.

> Changelog:
>       - Move definition of set_pid_ns_child_reaper() into the previous
>         helper function patch.
>       - Minor changes to accomodate changes in underlying patches.
>       - Fix compile warnings about parent_pid_ns (Cedric Le Goater)
>       - [Badari Pulavarty's comments]: No need to allocate nsproxy when
>         only CLONE_NEWPID is set. And attach_pid_nr() only needs one
>         parameter.
>       - Add privilege check for clone(CLONE_NEWPID).
>
> Signed-off-by: Sukadev Bhattiprolu <[EMAIL PROTECTED]>
> Cc: Cedric Le Goater <[EMAIL PROTECTED]>
> Cc: Dave Hansen <[EMAIL PROTECTED]>
> Cc: Serge Hallyn <[EMAIL PROTECTED]>
> Cc: [EMAIL PROTECTED]
>
> ---
>  include/linux/pid.h           |    2 -
>  include/linux/pid_namespace.h |   11 ++-------
>  kernel/fork.c                 |   12 ++++++---
>  kernel/nsproxy.c              |   11 ---------
>  kernel/pid.c | 51 ++++++++++++++++++++++++++++++------------
>  5 files changed, 50 insertions(+), 37 deletions(-)
>
> Index: lx26-20-mm2b/include/linux/pid.h
> ===================================================================
> --- lx26-20-mm2b.orig/include/linux/pid.h 2007-03-09 19:00:11.000000000 -0800
> +++ lx26-20-mm2b/include/linux/pid.h  2007-03-09 19:01:09.000000000 -0800
> @@ -119,7 +119,7 @@ extern struct pid *find_ge_pid(int nr);
>  extern int attach_pid_nr(struct pid *pid, struct pid_nr *pid_nr);
>  extern void free_pid_nr(struct pid_nr *pid_nr);
>  extern struct pid_nr *alloc_pid_nr(struct pid_namespace *pid_ns);
> -extern struct pid *alloc_pid(void);
> +extern struct pid *alloc_pid(int clone_flags);
>  extern void FASTCALL(free_pid(struct pid *pid));
>  extern pid_t pid_nr(struct pid *pid);
>  
> Index: lx26-20-mm2b/kernel/fork.c
> ===================================================================
> --- lx26-20-mm2b.orig/kernel/fork.c   2007-03-09 19:00:42.000000000 -0800
> +++ lx26-20-mm2b/kernel/fork.c        2007-03-09 19:01:09.000000000 -0800
> @@ -1144,6 +1144,8 @@ static struct task_struct *copy_process(
>       if (clone_flags & CLONE_THREAD)
>               p->tgid = current->tgid;
>  
> +     if ((retval = priv_check_pid_ns(clone_flags)))
> +             goto bad_fork_cleanup_policy;
>       if ((retval = security_task_alloc(p)))
>               goto bad_fork_cleanup_policy;
>       if ((retval = audit_alloc(p)))
> @@ -1169,6 +1171,9 @@ static struct task_struct *copy_process(
>       if (retval)
>               goto bad_fork_cleanup_namespaces;
>  
> +     /* We are now ready to set child reaper if we cloned pid ns */
> +     set_pid_ns_child_reaper(clone_flags, pid, p);
> +
>       p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr :
> NULL;
>       /*
>        * Clear TID on mm_release()?
> @@ -1384,7 +1389,7 @@ long do_fork(unsigned long clone_flags,
>             int __user *child_tidptr)
>  {
>       struct task_struct *p;
> -     struct pid *pid = alloc_pid();
> +     struct pid *pid = alloc_pid(clone_flags);
>       long nr;
>  
>       if (!pid)
> @@ -1671,7 +1676,7 @@ asmlinkage long sys_unshare(unsigned lon
>       if ((err = unshare_pid_ns(unshare_flags, &new_pid_nr)))
>               goto bad_unshare_cleanup_ipc;
>  
> -     if (new_ns || new_uts || new_ipc || new_pid_nr) {
> +     if (new_ns || new_uts || new_ipc) {
>               old_nsproxy = current->nsproxy;
>               new_nsproxy = dup_nsproxy(old_nsproxy);
>               if (!new_nsproxy) {
> @@ -1730,8 +1735,7 @@ asmlinkage long sys_unshare(unsigned lon
>               }
>  
>               if (new_pid_nr) {
> -                     pid = task_pid_ns(current);
> -                     set_task_pid_ns(current, new_pid_nr->pid_ns);
> +                     attach_pid_nr(task_pid(current), new_pid_nr);
>                       new_pid_nr = NULL;
>               }
>  
> Index: lx26-20-mm2b/kernel/nsproxy.c
> ===================================================================
> --- lx26-20-mm2b.orig/kernel/nsproxy.c        2007-03-09 19:00:14.000000000 
> -0800
> +++ lx26-20-mm2b/kernel/nsproxy.c     2007-03-09 19:01:09.000000000 -0800
> @@ -67,8 +67,6 @@ struct nsproxy *dup_nsproxy(struct nspro
>                       get_uts_ns(ns->uts_ns);
>               if (ns->ipc_ns)
>                       get_ipc_ns(ns->ipc_ns);
> -             if (ns->pid_ns)
> -                     get_pid_ns(ns->pid_ns);
>       }
>  
>       return ns;
> @@ -90,7 +88,7 @@ int copy_nsproxy(int flags, struct task_
>  
>       get_nsproxy(old_ns);
>  
> -     ns_all = CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID;
> +     ns_all = CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC;
>  
>       if (!(flags & ns_all))
>               return 0;
> @@ -115,17 +113,10 @@ int copy_nsproxy(int flags, struct task_
>       if (err)
>               goto out_ipc;
>  
> -     err = copy_pid_ns(flags, tsk);
> -     if (err)
> -             goto out_pid;
> -
>  out:
>       put_nsproxy(old_ns);
>       return err;
>  
> -out_pid:
> -     if (new_ns->ipc_ns)
> -             put_ipc_ns(new_ns->ipc_ns);
>  out_ipc:
>       if (new_ns->uts_ns)
>               put_uts_ns(new_ns->uts_ns);
> Index: lx26-20-mm2b/kernel/pid.c
> ===================================================================
> --- lx26-20-mm2b.orig/kernel/pid.c    2007-03-09 19:00:42.000000000 -0800
> +++ lx26-20-mm2b/kernel/pid.c 2007-03-09 19:01:09.000000000 -0800
> @@ -221,8 +221,13 @@ fastcall void free_pid(struct pid *pid)
>       hlist_del_rcu(&pid->pid_chain);
>       spin_unlock_irqrestore(&pidmap_lock, flags);
>  
> -     hlist_for_each_entry(pid_nr, pos, &pid->pid_nrs, node)
> +     hlist_for_each_entry(pid_nr, pos, &pid->pid_nrs, node) {
>               free_pidmap(pid_nr->pid_ns, pid_nr->nr);
> +
> +             /* put the reference we got in kref_init() in clone_pid_ns() */
> +             if (pid_nr->nr == 1)
> +                     put_pid_ns(pid_nr->pid_ns);

Ok.  This seems to make sense, but why restrict this to only pid 1?
I'm almost certain this will be the case, but... this seems a like
a unwarranted special case at the moment.

Basically why is it safe to restrict this to pid == 1.  Is it possible
that we can race here?

> +     }
>       call_rcu(&pid->rcu, delayed_put_pid);
>  }
>  
> @@ -357,34 +362,48 @@ struct pid_namespace *pid_ns(struct pid 
>       return ns;
>  }
>  
> -struct pid *alloc_pid(void)
> +struct pid *alloc_pid(int flags)
>  {
>       struct pid *pid;
>       enum pid_type type;
> -     int nr = -1;
> -     struct pid_nr *pid_nr;
> +     struct pid_nr *pid_nr[2] = { NULL, NULL};
I would rather not see pid_nr special cased this way at all (a loop?)
but if we are going to I think two separate variables makes more
sense than this array.
> +     struct pid_namespace *new_pid_ns = NULL;
>  
>       pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
>       if (!pid)
>               return NULL;
>  
> -     nr = alloc_pidmap(task_pid_ns(current));
> -     if (nr < 0)
> +     pid_nr[0] = alloc_pidmap_pid_nr(task_pid_ns(current));
> +     if (!pid_nr[0] < 0)
>               goto out_free_pid;
>  
> -     pid_nr = alloc_pid_nr(task_pid_ns(current));
> -     if (!pid_nr)
> -             goto out_free_pidmap;
> -
> +     if (flags & CLONE_NEWPID) {
> +             new_pid_ns = clone_pid_ns();
> +             if (!new_pid_ns)
> +                     goto out_free_pid_nr0;
> +             /*
> +              * For now, allocate a pid_nr only for the new pid namespace.
> +              * Eventually we should allocate a pid_nr for each ancestor
> +              * namespace. While this could cost us additional memory in
> +              * deeply nested containers, it would allow us to see/signal
> +              * all processes from init-pid-ns.
> +              */
> +             pid_nr[1] = alloc_pidmap_pid_nr(new_pid_ns);
> +             if (!pid_nr[1])
> +                     goto out_free_pid_ns;
> +     }
>       atomic_set(&pid->count, 1);
> -     pid->nr = pid_nr->nr = nr;      /* pid->nr to be removed soon */
> +     pid->nr = pid_nr[0]->nr;        /* pid->nr to be removed soon */
>       for (type = 0; type < PIDTYPE_MAX; ++type)
>               INIT_HLIST_HEAD(&pid->tasks[type]);
>  
>       spin_lock_init(&pid->lock);
>  
>       INIT_HLIST_HEAD(&pid->pid_nrs);
> -     hlist_add_head_rcu(&pid_nr->node, &pid->pid_nrs);
> +     hlist_add_head_rcu(&pid_nr[0]->node, &pid->pid_nrs);
> +
> +     if (pid_nr[1])
> +             hlist_add_head_rcu(&pid_nr[1]->node, &pid->pid_nrs);
>  
>       spin_lock_irq(&pidmap_lock);
>       hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
> @@ -392,11 +411,15 @@ struct pid *alloc_pid(void)
>  
>       return pid;
>  
> -out_free_pidmap:
> -     free_pidmap(task_pid_ns(current), nr);
> +out_free_pid_ns:
> +     put_pid_ns(new_pid_ns);
> +
> +out_free_pid_nr0:
> +     free_pidmap_pid_nr(pid_nr[0]);
>  
>  out_free_pid:
>       kmem_cache_free(pid_cachep, pid);
> +
>       return NULL;
>  }
>  
> Index: lx26-20-mm2b/include/linux/pid_namespace.h
> ===================================================================
> --- lx26-20-mm2b.orig/include/linux/pid_namespace.h 2007-03-09
> 19:00:11.000000000 -0800
> +++ lx26-20-mm2b/include/linux/pid_namespace.h 2007-03-09 19:01:09.000000000
> -0800
> @@ -33,6 +33,7 @@ extern int unshare_pid_ns(unsigned long 
>                       struct pid_nr **new_pid_nr);
>  extern int copy_pid_ns(int flags, struct task_struct *tsk);
>  extern void free_pid_ns(struct kref *kref);
> +extern struct pid_namespace *pid_ns(struct pid * pid);
>  
>  static inline void put_pid_ns(struct pid_namespace *ns)
>  {
> @@ -41,18 +42,12 @@ static inline void put_pid_ns(struct pid
>  
>  static inline struct pid_namespace *task_pid_ns(struct task_struct *tsk)
>  {
> -        return tsk->nsproxy->pid_ns;
> -}
> -
> -static inline void set_task_pid_ns(struct task_struct *tsk,
> -                             struct pid_namespace * ns)
> -{
> -        tsk->nsproxy->pid_ns = ns
> +        return pid_ns(task_pid(tsk));
>  }
>  
>  static inline struct task_struct *child_reaper(struct task_struct *tsk)
>  {
> -     return init_pid_ns.child_reaper;
> +     return task_pid_ns(tsk)->child_reaper;
>  }
>  
>  #endif /* _LINUX_PID_NS_H */

Eric
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.osdl.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to