> > If you get down to it, the thing is about delegating control over part
> > of namespace to somebody, without letting them control, see, etc. the
> > rest of it.  So I'd rather be very conservative about extra information
> > we allow to piggyback on that.  I don't know... perhaps with stable peer
> > group IDs it would be OK to show peer group ID by (our) vfsmount + peer
> > group ID of master + peer group ID of nearest dominating group that has
> > intersection with our namespace.  Then we don't leak information (AFAICS),
> > get full propagation information between our vfsmounts and cooperating
> > tasks in different namespaces can figure the things out as much as possible
> > without leaking 3rd-party information to either.
> 

Here's a patch against current -mm implementing this (with some
cleanups thrown in).  Done some testing on it as well, it wasn't
entirey trivial to figure out a setup, where propagation goes out of
the namespace first, then comes back in:

  mount --bind /mnt1 /mnt1
  mount --make-shared /mnt1
  mount --bind /mnt2 /mnt2
  mount --make-shared /mnt2
  newns
  mount --make-slave /mnt1

old ns:
  mount --make-slave /mnt2
  mount --bind /mnt1/tmp /mnt1/tmp

new ns:
  mount --make-shared /mnt1/tmp
  mount --bind /mnt1/tmp /mnt2/tmp

Voila.


Signed-off-by: Miklos Szeredi <[EMAIL PROTECTED]>
---

Index: linux/fs/pnode.c
===================================================================
--- linux.orig/fs/pnode.c       2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.c    2008-02-22 15:27:26.000000000 +0100
@@ -9,8 +9,12 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/idr.h>
 #include "pnode.h"
 
+static DEFINE_SPINLOCK(mnt_pgid_lock);
+static DEFINE_IDA(mnt_pgid_ida);
+
 /* return the next shared peer mount of @p */
 static inline struct vfsmount *next_peer(struct vfsmount *p)
 {
@@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav
        return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
 }
 
-static int __peer_group_id(struct vfsmount *mnt)
+static void __set_mnt_shared(struct vfsmount *mnt)
 {
-       struct vfsmount *m;
-       int id = mnt->mnt_id;
+       mnt->mnt_flags &= ~MNT_PNODE_MASK;
+       mnt->mnt_flags |= MNT_SHARED;
+}
+
+void set_mnt_shared(struct vfsmount *mnt)
+{
+       int res;
 
-       for (m = next_peer(mnt); m != mnt; m = next_peer(m))
-               id = min(id, m->mnt_id);
+ retry:
+       spin_lock(&mnt_pgid_lock);
+       if (IS_MNT_SHARED(mnt)) {
+               spin_unlock(&mnt_pgid_lock);
+               return;
+       }
 
-       return id;
+       res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid);
+       spin_unlock(&mnt_pgid_lock);
+       if (res == -EAGAIN) {
+               if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL))
+                       goto retry;
+       }
+       __set_mnt_shared(mnt);
+}
+
+void clear_mnt_shared(struct vfsmount *mnt)
+{
+       if (IS_MNT_SHARED(mnt)) {
+               mnt->mnt_flags &= ~MNT_SHARED;
+               mnt->mnt_pgid = -1;
+       }
+}
+
+void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt)
+{
+       mnt->mnt_pgid = old->mnt_pgid;
+       list_add(&mnt->mnt_share, &old->mnt_share);
+       __set_mnt_shared(mnt);
 }
 
-/* return the smallest ID within the peer group */
 int get_peer_group_id(struct vfsmount *mnt)
 {
+       return mnt->mnt_pgid;
+}
+
+int get_master_id(struct vfsmount *mnt)
+{
        int id;
 
        spin_lock(&vfsmount_lock);
-       id = __peer_group_id(mnt);
+       id = get_peer_group_id(mnt->mnt_master);
        spin_unlock(&vfsmount_lock);
 
        return id;
 }
 
-/* return the smallest ID within the master's peer group */
-int get_master_id(struct vfsmount *mnt)
+static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt,
+                                      struct mnt_namespace *ns)
 {
-       int id;
+       struct vfsmount *m = mnt;
+
+       do {
+               if (m->mnt_ns == ns)
+                       return m;
+               m = next_peer(m);
+       } while (m != mnt);
+
+       return NULL;
+}
+
+int get_dominator_id_same_ns(struct vfsmount *mnt)
+{
+       int id = -1;
+       struct vfsmount *m;
 
        spin_lock(&vfsmount_lock);
-       id = __peer_group_id(mnt->mnt_master);
+       for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
+               struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns);
+               if (d) {
+                       id = d->mnt_pgid;
+                       break;
+               }
+       }
        spin_unlock(&vfsmount_lock);
 
        return id;
@@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount
                if (peer_mnt == mnt)
                        peer_mnt = NULL;
        }
-       list_del_init(&mnt->mnt_share);
+       if (!list_empty(&mnt->mnt_share))
+               list_del_init(&mnt->mnt_share);
+       else if (IS_MNT_SHARED(mnt)) {
+               spin_lock(&mnt_pgid_lock);
+               ida_remove(&mnt_pgid_ida, mnt->mnt_pgid);
+               spin_unlock(&mnt_pgid_lock);
+       }
 
        if (peer_mnt)
                master = peer_mnt;
@@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount
                list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
                        slave_mnt->mnt_master = master;
                list_move(&mnt->mnt_slave, &master->mnt_slave_list);
-               list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
-               INIT_LIST_HEAD(&mnt->mnt_slave_list);
+               list_splice_init(&mnt->mnt_slave_list,
+                                master->mnt_slave_list.prev);
        } else {
-               struct list_head *p = &mnt->mnt_slave_list;
-               while (!list_empty(p)) {
-                        slave_mnt = list_first_entry(p,
+               while (!list_empty(&mnt->mnt_slave_list)) {
+                       slave_mnt = list_first_entry(&mnt->mnt_slave_list,
                                        struct vfsmount, mnt_slave);
                        list_del_init(&slave_mnt->mnt_slave);
                        slave_mnt->mnt_master = NULL;
                }
        }
        mnt->mnt_master = master;
-       CLEAR_MNT_SHARED(mnt);
-       INIT_LIST_HEAD(&mnt->mnt_slave_list);
+       clear_mnt_shared(mnt);
        return 0;
 }
 
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c   2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/namespace.c        2008-02-22 15:27:26.000000000 +0100
@@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char
                        return NULL;
                }
 
+               mnt->mnt_pgid = -1;
                atomic_set(&mnt->mnt_count, 1);
                INIT_LIST_HEAD(&mnt->mnt_hash);
                INIT_LIST_HEAD(&mnt->mnt_child);
@@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct
                if (flag & CL_SLAVE) {
                        list_add(&mnt->mnt_slave, &old->mnt_slave_list);
                        mnt->mnt_master = old;
-                       CLEAR_MNT_SHARED(mnt);
+                       clear_mnt_shared(mnt);
                } else if (!(flag & CL_PRIVATE)) {
-                       if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
-                               list_add(&mnt->mnt_share, &old->mnt_share);
+                       if (flag & CL_PROPAGATION)
+                               set_mnt_shared(old);
+                       if (IS_MNT_SHARED(old))
+                               make_mnt_peer(old, mnt);
                        if (IS_MNT_SLAVE(old))
                                list_add(&mnt->mnt_slave, &old->mnt_slave);
                        mnt->mnt_master = old->mnt_master;
@@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil
        show_sb_opts(m, sb);
        if (sb->s_op->show_options)
                err = sb->s_op->show_options(m, mnt);
-       if (IS_MNT_SHARED(mnt)) {
-               seq_printf(m, " shared:%i", get_peer_group_id(mnt));
-               if (IS_MNT_SLAVE(mnt))
-                       seq_printf(m, ",slave:%i", get_master_id(mnt));
-       } else if (IS_MNT_SLAVE(mnt)) {
-               seq_printf(m, " slave:%i", get_master_id(mnt));
+       seq_putc(m, ' ');
+       if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
+               if (IS_MNT_SHARED(mnt))
+                       seq_printf(m, "shared:%i", get_peer_group_id(mnt));
+               if (IS_MNT_SLAVE(mnt)) {
+                       int dominator_id = get_dominator_id_same_ns(mnt);
+
+                       if (IS_MNT_SHARED(mnt))
+                               seq_putc(m, ',');
+
+                       seq_printf(m, "slave:%i", get_master_id(mnt));
+                       if (dominator_id != -1)
+                               seq_printf(m, ":%i", dominator_id);
+               }
        } else if (IS_MNT_UNBINDABLE(mnt)) {
-               seq_printf(m, " unbindable");
+               seq_printf(m, "unbindable");
        } else {
-               seq_printf(m, " private");
+               seq_printf(m, "private");
        }
        seq_putc(m, '\n');
        return err;
Index: linux/fs/pnode.h
===================================================================
--- linux.orig/fs/pnode.h       2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.h    2008-02-22 15:27:26.000000000 +0100
@@ -14,7 +14,6 @@
 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
 #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
 #define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
-#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
 
 #define CL_EXPIRE              0x01
@@ -24,12 +23,9 @@
 #define CL_PROPAGATION                 0x10
 #define CL_PRIVATE             0x20
 
-static inline void set_mnt_shared(struct vfsmount *mnt)
-{
-       mnt->mnt_flags &= ~MNT_PNODE_MASK;
-       mnt->mnt_flags |= MNT_SHARED;
-}
-
+void set_mnt_shared(struct vfsmount *);
+void clear_mnt_shared(struct vfsmount *);
+void make_mnt_peer(struct vfsmount *, struct vfsmount *);
 void change_mnt_propagation(struct vfsmount *, int);
 int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
                struct list_head *);
@@ -37,4 +33,5 @@ int propagate_umount(struct list_head *)
 int propagate_mount_busy(struct vfsmount *, int);
 int get_peer_group_id(struct vfsmount *);
 int get_master_id(struct vfsmount *);
+int get_dominator_id_same_ns(struct vfsmount *);
 #endif /* _LINUX_PNODE_H */
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h    2008-02-22 15:27:23.000000000 +0100
+++ linux/include/linux/mount.h 2008-02-22 15:27:26.000000000 +0100
@@ -57,6 +57,7 @@ struct vfsmount {
        struct vfsmount *mnt_master;    /* slave is on master->mnt_slave_list */
        struct mnt_namespace *mnt_ns;   /* containing namespace */
        int mnt_id;                     /* mount identifier */
+       int mnt_pgid;                   /* peer group identifier */
        /*
         * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
         * to let these frequently modified fields in a separate cache line
Index: linux/Documentation/filesystems/proc.txt
===================================================================
--- linux.orig/Documentation/filesystems/proc.txt       2008-02-22 
15:27:23.000000000 +0100
+++ linux/Documentation/filesystems/proc.txt    2008-02-22 15:27:26.000000000 
+0100
@@ -2367,21 +2367,20 @@ MNTOPTS: per mount options
 SBOPTS: per super block options
 PROPAGATION: propagation type
 
-propagation type: <propagation_flag>[:<mntid>][,...]
-       note: 'shared' flag is followed by the mntid of its peer mount
-             'slave' flag is followed by the mntid of its master mount
+propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...]
+       note: 'shared' flag is followed by the id of this mount's peer group
+             'slave' flag is followed by the peer group id of its master mount,
+                     optionally followed by the id of the closest dominant(*)
+                     peer group in the same namespace, if one exists.
              'private' flag stands by itself
              'unbindable' flag stands by itself
 
-The 'mntid' used in the propagation type is a canonical ID of the peer
-group (currently the smallest ID within the group is used for this
-purpose, but this should not be relied on).  Since mounts can be added
-or removed from the peer group, this ID only guaranteed to stay the
-same on a static propagation tree.
+(*) A dominant peer group is an ancestor of this mount in the
+propagation tree, in other words, this mount receives propagation from
+the dominant peer group, but not the other way round.
 
 For more information see:
 
   Documentation/filesystems/sharedsubtree.txt
 
-
 ------------------------------------------------------------------------------
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to