From: Sukadev Bhattiprolu <[EMAIL PROTECTED]>
Subject: [RFC][PATCH 8/8]: Enable multiple mounts of devpts

To support containers, allow multiple instances of devpts filesystem.

But to preserve backward compatibility, provide this support for
multiple-mounts under the new mount option, '-o newmnt'.

IOW, devpts must support both single-mount and multiple-mount semantics.
If the filesystem is mounted without the 'newmnt' option (as in current
start-up scripts) the new mount simply binds to the initial kernel mount
of devpts and thus current behavior is preserved.

If the 'newmnt' option is specified (by new container-startup scripts) a
new instance of the devpts fs is created and any ptys created in this
instance are independent of the ptys in other mounts of devpts.

(Hmm would 'private-mount' be a better name as in MAP_PRIVATE) ?

Eg: A container startup script could do the following:

        $ ns_exec -cm /bin/bash
        $ umount /dev/pts
        $ mount -t devpts -o newmnt lxcpts /dev/pts
        $ sshd -p 6710

where 'ns_exec -cm /bin/bash' is calls clone() with CLONE_NEWNS flag
and execs /bin/bash in the child process.  A pty created by the sshd
is not visible in the original mount of /dev/pts.

USER-SPACE-IMPACT:

        The -onewmnt option is meant to minimize userspace impact. Following
        are known impacts.

        1. /dev/ptmx symlink to pts/ptmx. This is optional if only single-
           mount semantics is desired but is required if multi-mount semantics.

        2. /dev/pts fs has a new entry (ptmx device node) that is created/
           destroyed automatically.

        TODO: Others impacts ?

Implementation note:

        See comments in new get_sb_ref() function in fs/super.c
        (yes fs/super.c !) on why get_sb_single() cannot be
        directly used.


Changelog[v2]:
        Support both single-mount and multiple-mount semantics and
        provide '-onewmnt' option to select the semantics.

---
 fs/devpts/inode.c  |   43 +++++++++++++++++++++++++++++++++++++++++--
 fs/super.c         |   44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |    2 ++
 3 files changed, 87 insertions(+), 2 deletions(-)

Index: linux-2.6.26-rc8-mm1/fs/devpts/inode.c
===================================================================
--- linux-2.6.26-rc8-mm1.orig/fs/devpts/inode.c 2008-08-20 17:44:29.000000000 
-0700
+++ linux-2.6.26-rc8-mm1/fs/devpts/inode.c      2008-08-20 17:50:42.000000000 
-0700
@@ -41,10 +41,11 @@ struct pts_mount_opts {
        gid_t   gid;
        umode_t mode;
        umode_t ptmx_mode;
+       int newmnt;
 };
 
 enum {
-       Opt_uid, Opt_gid, Opt_mode, Opt_ptmx_mode,
+       Opt_uid, Opt_gid, Opt_mode, Opt_ptmx_mode, Opt_newmnt,
        Opt_err
 };
 
@@ -53,6 +54,7 @@ static match_table_t tokens = {
        {Opt_gid, "gid=%u"},
        {Opt_mode, "mode=%o"},
        {Opt_ptmx_mode, "ptmx_mode=%o"},
+       { Opt_newmnt, "newmnt" },
        {Opt_err, NULL}
 };
 
@@ -84,6 +86,7 @@ static int parse_mount_options(char *dat
        opts->gid     = 0;
        opts->mode    = DEVPTS_DEFAULT_MODE;
        opts->ptmx_mode = DEVPTS_DEFAULT_PTMX_MODE;
+       opts->newmnt = 0;
 
        while ((p = strsep(&data, ",")) != NULL) {
                substring_t args[MAX_OPT_ARGS];
@@ -117,6 +120,9 @@ static int parse_mount_options(char *dat
                                return -EINVAL;
                        opts->ptmx_mode = option & S_IALLUGO;
                        break;
+               case Opt_newmnt:
+                       opts->newmnt = 1;
+                       break;
                default:
                        printk(KERN_ERR "devpts: called with bogus options\n");
                        return -EINVAL;
@@ -145,6 +151,8 @@ static int devpts_show_options(struct se
                seq_printf(seq, ",gid=%u", opts->gid);
        seq_printf(seq, ",mode=%03o", opts->mode);
        seq_printf(seq, ",ptmx_mode=%03o", opts->ptmx_mode);
+       if (opts->newmnt)
+               seq_printf(seq, ",newmnt");
 
        return 0;
 }
@@ -256,12 +264,43 @@ int mknod_ptmx(struct super_block *sb)
        return 0;
 }
 
+static int mount_init_pts(struct file_system_type *fs_type, int flags,
+               void *data, struct vfsmount *mnt)
+{
+       int err;
+
+       if (!devpts_mnt) {
+               err = get_sb_single(fs_type, flags, data, devpts_fill_super,
+                       mnt);
+               if (!err)
+                       devpts_mnt = mnt;
+               return err;
+       }
+
+       err = get_sb_ref(devpts_mnt->mnt_sb, flags, data, mnt);
+
+       printk(KERN_ERR "mount_init_pts(): returning %d\n", err);
+       return err;
+}
+
 static int devpts_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
        int err;
+       struct pts_mount_opts opts;
+
+       if (parse_mount_options((char *)data, &opts))
+               return -EINVAL;
+
+       printk(KERN_ERR "devpts_get_sb(): newmnt option is %d\n", opts.newmnt);
+
+       if (opts.newmnt) {
+               err = get_sb_nodev(fs_type, flags, data, devpts_fill_super,
+                               mnt);
+       } else {
+               err = mount_init_pts(fs_type, flags, data, mnt);
+       }
 
-       err = get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
        if (err)
                return err;
 
Index: linux-2.6.26-rc8-mm1/fs/super.c
===================================================================
--- linux-2.6.26-rc8-mm1.orig/fs/super.c        2008-08-20 17:44:29.000000000 
-0700
+++ linux-2.6.26-rc8-mm1/fs/super.c     2008-08-20 18:07:38.000000000 -0700
@@ -883,6 +883,50 @@ int get_sb_single(struct file_system_typ
 
 EXPORT_SYMBOL(get_sb_single);
 
+int get_sb_ref(struct super_block *sb, int flags, void *data,
+               struct vfsmount *mnt)
+{
+       int err;
+
+       /*
+        * UGLY:
+        *
+        * This is needed to support multiple mounts in devpts while
+        * preserving backward compatibility of the current 'single-mount'
+        * semantics.
+        *
+        * devpts cannot simply use get_sb_single(), bc get_sb_single() or
+        * more specifically, sget() finds the most recent mount of devpts.
+        * But that recent mount may not the be initial kernel mount (user
+        * may mounted with the '-onewmnt' option since the initial mount
+        * and get_sb_single() would pick that super-block).
+        *
+        * Caller is responsible to ensure that 'sb' is valid initialized.
+        * So armed with that fact, unroll essentials of get_sb_single()
+        * here.
+        */
+       spin_lock(&sb_lock);
+
+       if (!grab_super(sb)) {
+               /*
+                * TODO: anymore cleanup ?
+                */
+               return -EAGAIN;
+       }
+
+       err = do_remount_sb(sb, flags, data, 0);
+       if (err) {
+               /*
+                * (don't deactivate_super() here - its from initial pts mount)
+                *
+                * TODO: anymore cleanup ?
+                */
+               up_write(&sb->s_umount);
+               return err;
+       }
+       return simple_set_mnt(mnt, sb);
+}
+
 struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, 
void *data)
 {
Index: linux-2.6.26-rc8-mm1/include/linux/fs.h
===================================================================
--- linux-2.6.26-rc8-mm1.orig/include/linux/fs.h        2008-08-20 
17:46:27.000000000 -0700
+++ linux-2.6.26-rc8-mm1/include/linux/fs.h     2008-08-20 17:47:04.000000000 
-0700
@@ -1522,6 +1522,8 @@ extern int get_sb_nodev(struct file_syst
        int flags, void *data,
        int (*fill_super)(struct super_block *, void *, int),
        struct vfsmount *mnt);
+extern int get_sb_ref(struct super_block *sb, int flags, void *data,
+       struct vfsmount *mnt);
 void generic_shutdown_super(struct super_block *sb);
 void kill_block_super(struct super_block *sb);
 void kill_anon_super(struct super_block *sb);
_______________________________________________
Containers mailing list
[EMAIL PROTECTED]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to