On Mon, Apr 04, 2016 at 08:29:17PM -0500, Eric W. Biederman wrote:

> +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
> +static int devpts_path_ptmx(struct file *filp)
> +{
> +     struct pts_fs_info *fsi;
> +     struct path root, path;
> +     struct dentry *old;
> +     int err = -ENOENT;
> +     int ret;
> +
> +     /* Can the pts filesystem be found with a path walk? */
> +     path = filp->f_path;
> +     path_get(&path);
> +     get_fs_root(current->fs, &root);
> +     ret = path_parent(&root, &path);
> +     path_put(&root);
> +     if (ret != 1)
> +             goto fail;

That, I take it, is a lookup for .. and buggering off if it fails *or* if
we had been in caller's root or something that overmount it?  Not that the
latter had been possible - root is a directory and can be overmounted only
by another such, and we are called from ->open() of a device node.

> +     /* Remember the result of this permission check for later */
> +     ret = inode_permission(path.dentry->d_inode, MAY_EXEC);
> +     if (path_pts(&path))
> +             goto fail;

Egads, man - you've just introduced a special function for looking up
something named "pts" in a given directory!

The reason not to use kern_path() would be what, the fact that it doesn't
allow starting at given location?  So let's make a variant that would - and
rather than bothering with RCU, just go for something like (completely
untested)

/* on success overwrite *path with the result of walk; do _not_ drop the
   reference to old contents - let the caller arrange that */
int kern_path_relative(struct path *path, const char *s, int flags)
{
        int err;
        struct nameidata nd = {.path = *path};
        struct filename *name;

        if (!*s || *s == '/' || flags & (LOOKUP_ROOT | LOOKUP_RCU))
                return -EINVAL;

        name = getname_kernel(s);
        if (IS_ERR(name))
                return PTR_ERR(name);

        set_nameidata(&nd, AT_FDCWD, name);  

        nd.last_type = LAST_ROOT;
        nd.flags = flags | LOOKUP_REVAL | LOOKUP_JUMPED | LOOKUP_PARENT;
        nd.m_seq = read_seqbegin(&mount_lock);
        path_get(&nd.path);
        nd.inode = nd.path.dentry->d_inode;

        while (!(err = link_path_walk(s, &nd)) 
                && ((err = lookup_last(&nd)) > 0)) {
                s = trailing_symlink(&nd);
                if (IS_ERR(s)) {
                        err = PTR_ERR(s);
                        break;
                }
        }
        if (!err)
                err = complete_walk(&nd);

        if (!err && flags & LOOKUP_DIRECTORY)
                if (!d_can_lookup(nd.path.dentry))
                        err = -ENOTDIR;
        if (!err) {
                *path = nd.path;
                nd.path.mnt = NULL;
                nd.path.dentry = NULL;
        }
        terminate_walk(&nd);
        restore_nameidata();
        putname(name);
        return err;
}

and use it as

        path = filp->f_path;
        err = kern_path_relative(&path, "../pts", LOOKUP_DIRECTORY);
        if (err)
                return err;
        /* from here on we need to path_put() it */
        if (path.mnt->mnt_sb->s_magic != DEVPTS_SUPER_MAGIC)
                goto fail;
        /* must be its root; no other directories on that puppy */

> +     fsi = DEVPTS_SB(path.mnt->mnt_sb);
> +
> +     /* Get out if the path walk resulted in the default devpts instance */
> +     if (devpts_mnt->mnt_sb == path.mnt->mnt_sb)
> +             goto fail;
> +
> +     /* Don't allow bypassing the existing /dev/pts/ptmx permission check */

        err = inode_permission(path.dentry->d_inode, MAY_EXEC);
        if (err)
                goto fail;
        err = inode_permission(fsi->ptmx_dentry->d_inode,
                                       ACC_MODE(filp->f_flags));
        if (err)
                goto fail;

> +     /* Advance path to the ptmx dentry */
> +     old = path.dentry;
> +     path.dentry = dget(fsi->ptmx_dentry);
> +     dput(old);
> +
> +     /* Make it look like /dev/pts/ptmx was opened */
> +     err = update_file_path(filp, &path);
> +     if (err)
> +             goto fail;
> +
> +     return 0;
> +fail:
> +     path_put(&path);
> +     return err;
> +}
> +#else
> +static inline int devpts_path_ptmx(struct file *filp)
> +{
> +     return -ENOENT;
> +}
> +#endif
> +
> +struct inode *devpts_ptmx(struct inode *inode, struct file *filp)
> +{
> +     int err;
> +     if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
> +             return inode;
> +
> +     err = devpts_path_ptmx(filp);
> +     if (err == 0)
> +             return filp->f_inode;
> +     if (err != -ENOENT)
> +             return ERR_PTR(err);
> +
> +     return inode;
> +}

Umm...  I'm not sure it makes for good calling conventions - the caller can
do inode = file_inode(filp) just as well, so why not simply return 0 or -E...?
"return inode;" cases become simply return 0...

> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1415,29 +1415,41 @@ static void follow_mount(struct path *path)
>       }
>  }
>  
> -static int follow_dotdot(struct nameidata *nd)
> +int path_parent(struct path *root, struct path *path)

Please, don't.

> +#ifdef CONFIG_UNIX98_PTYS
> +int path_pts(struct path *path)

Fuck, no.

> index 17cb6b1dab75..e1ed78fa474b 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -679,6 +679,24 @@ int open_check_o_direct(struct file *f)
>       return 0;
>  }
>  
> +int update_file_path(struct file *filp, struct path *path)
> +{
> +     /* Only valid during f_op->open, and even in open use very carefully */
> +     struct path old;
> +     struct inode *inode;
> +
> +     if (filp->f_mode & FMODE_WRITER)
> +             return -EINVAL;

That really needs to be commented.

> +     old = filp->f_path;
> +     inode = path->dentry->d_inode;
> +     filp->f_path = *path;
> +     filp->f_inode = inode;
> +     filp->f_mapping = inode->i_mapping;
> +     path_put(&old);
> +     return 0;
> +}

> +
>  static int do_dentry_open(struct file *f,
>                         struct inode *inode,
>                         int (*open)(struct inode *, struct file *),
> @@ -736,6 +754,7 @@ static int do_dentry_open(struct file *f,
>               error = open(inode, f);
>               if (error)
>                       goto cleanup_all;
> +             inode = f->f_inode;
>       }
>       if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
>               i_readcount_inc(inode);

BTW, have you looked through the callers of dentry_open()?  It can hit that
case as well...

Reply via email to