The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-1127.18.2.vz7.163.11 ------> commit 2bb98f0659738df2f07dd7dcb7f136d8a7164927 Author: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> Date: Wed Aug 26 09:47:45 2020 +0300
ms/vfs: syscall: Add move_mount(2) to move mounts around Patchset description: These syscalls were added as preparation step for new mount api (fsopen, fsconfig, fsmount and fspick will be ported separately). We can use them to implement "cross-namespace bind-mounting" like this: fd = open_tree(AT_FDCWD, "/mnt", OPEN_TREE_CLONE); setns(nsfd, CLONE_NEWNS); move_mount(fd, "", AT_FDCWD, "/mnt2", MOVE_MOUNT_F_EMPTY_PATH); This will allow us implementing feature of adding bindmounts to runing container instead of having unreliable external propagations. It is needed to VZ8, but does not apply cleanly so I will send it separately. https://jira.sw.ru/browse/PSBM-107263 Current patch description: From: David Howells <dhowe...@redhat.com> Add a move_mount() system call that will move a mount from one place to another and, in the next commit, allow to attach an unattached mount tree. The new system call looks like the following: int move_mount(int from_dfd, const char *from_path, int to_dfd, const char *to_path, unsigned int flags); Signed-off-by: David Howells <dhowe...@redhat.com> cc: linux-...@vger.kernel.org Signed-off-by: Al Viro <v...@zeniv.linux.org.uk> vfs: syscall: Add move_mount(2) to move mounts around (cherry-picked from commit 2db154b3ea8e14b04fee23e3fdfd5e9d17fbc6ae) uapi, x86: Fix the syscall numbering of the mount API syscalls [ver #2] (cherry-picked from commit 9c8ad7a2ff0bfe58f019ec0abc1fb965114dde7d) selinux: fix regression introduced by move_mount(2) syscall (cherry-picked from commit 98aa00345de54b8340dc2ddcd87f446d33387b5e) https://jira.sw.ru/browse/PSBM-107263 Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com> --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + fs/namespace.c | 126 +++++++++++++++++++++++++++++---------- include/linux/security.h | 12 ++++ include/linux/syscalls.h | 3 + include/uapi/linux/fs.h | 11 ++++ security/capability.c | 6 ++ security/security.c | 5 ++ security/selinux/hooks.c | 9 +++ 9 files changed, 143 insertions(+), 31 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 7b32f7c..978f07c 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -372,6 +372,7 @@ 382 i386 pkey_free sys_pkey_free 428 i386 open_tree sys_open_tree +429 i386 move_mount sys_move_mount 510 i386 getluid sys_getluid 511 i386 setluid sys_setluid diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 6b3a1d1..3c86aba 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -337,6 +337,7 @@ 331 common pkey_free sys_pkey_free 428 common open_tree sys_open_tree +429 common move_mount sys_move_mount 500 64 getluid sys_getluid 501 64 setluid sys_setluid diff --git a/fs/namespace.c b/fs/namespace.c index 694e3d6..0820db1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2852,72 +2852,81 @@ out_unlock: return err; } -static int do_move_mount(struct path *path, const char *old_name) +static int do_move_mount(struct path *old_path, struct path *new_path) { - struct path old_path, parent_path; + struct path parent_path = {.mnt = NULL, .dentry = NULL}; struct mount *p; struct mount *old; struct mountpoint *mp; int err; - if (!old_name || !*old_name) - return -EINVAL; - err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); - if (err) - return err; - mp = lock_mount(path); - err = PTR_ERR(mp); + mp = lock_mount(new_path); if (IS_ERR(mp)) - goto out; + return PTR_ERR(mp); - old = real_mount(old_path.mnt); - p = real_mount(path->mnt); + old = real_mount(old_path->mnt); + p = real_mount(new_path->mnt); err = -EINVAL; if (!check_mnt(p) || !check_mnt(old)) - goto out1; + goto out; - if (old->mnt.mnt_flags & MNT_LOCKED) - goto out1; + if (!mnt_has_parent(old)) + goto out; - err = -EINVAL; - if (old_path.dentry != old_path.mnt->mnt_root) - goto out1; + if (old->mnt.mnt_flags & MNT_LOCKED) + goto out; - if (!mnt_has_parent(old)) - goto out1; + if (old_path->dentry != old_path->mnt->mnt_root) + goto out; - if (S_ISDIR(path->dentry->d_inode->i_mode) != - S_ISDIR(old_path.dentry->d_inode->i_mode)) - goto out1; + if (S_ISDIR(new_path->dentry->d_inode->i_mode) != + S_ISDIR(old_path->dentry->d_inode->i_mode)) + goto out; /* * Don't move a mount residing in a shared parent. */ if (IS_MNT_SHARED(old->mnt_parent)) - goto out1; + goto out; /* * Don't move a mount tree containing unbindable mounts to a destination * mount which is shared. */ if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) - goto out1; + goto out; err = -ELOOP; for (; mnt_has_parent(p); p = p->mnt_parent) if (p == old) - goto out1; + goto out; - err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path); + err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, + &parent_path); if (err) - goto out1; + goto out; /* if the mount is moved, it should no longer be expire * automatically */ list_del_init(&old->mnt_expire); -out1: - unlock_mount(mp); out: + unlock_mount(mp); if (!err) path_put(&parent_path); + return err; +} + +static int do_move_mount_old(struct path *path, const char *old_name) +{ + struct path old_path; + int err; + + if (!old_name || !*old_name) + return -EINVAL; + + err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); + if (err) + return err; + + err = do_move_mount(&old_path, path); path_put(&old_path); return err; } @@ -3334,7 +3343,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, else if (cmd & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (cmd & MS_MOVE) - retval = do_move_mount(&path, dev_name); + retval = do_move_mount_old(&path, dev_name); else if (cmd & MS_SET_GROUP) retval = do_set_group(&path, dev_name); else @@ -3568,6 +3577,61 @@ out_type: } /* + * Move a mount from one place to another. + * + * Note the flags value is a combination of MOVE_MOUNT_* flags. + */ +SYSCALL_DEFINE5(move_mount, + int, from_dfd, const char *, from_pathname, + int, to_dfd, const char *, to_pathname, + unsigned int, flags) +{ + struct path from_path, to_path; + unsigned int lflags; + int ret = 0; + + if (!may_mount()) + return -EPERM; + + if (flags & ~MOVE_MOUNT__MASK) + return -EINVAL; + + /* If someone gives a pathname, they aren't permitted to move + * from an fd that requires unmount as we can't get at the flag + * to clear it afterwards. + */ + lflags = 0; + if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; + if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY; + + ret = user_path_at(from_dfd, from_pathname, lflags, &from_path); + if (ret < 0) + return ret; + + lflags = 0; + if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; + if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY; + + ret = user_path_at(to_dfd, to_pathname, lflags, &to_path); + if (ret < 0) + goto out_from; + + ret = security_move_mount(&from_path, &to_path); + if (ret < 0) + goto out_to; + + ret = do_move_mount(&from_path, &to_path); + +out_to: + path_put(&to_path); +out_from: + path_put(&from_path); + return ret; +} + +/* * Return true if path is reachable from root * * namespace_sem or mount_lock is held diff --git a/include/linux/security.h b/include/linux/security.h index 47aed52..2fe05c2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -323,6 +323,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Parse a string of security data filling in the opts structure * @options string containing all mount options known by the LSM * @opts binary data structure usable by the LSM + * @move_mount: + * Check permission before a mount is moved. + * @from_path indicates the mount that is going to be moved. + * @to_path indicates the mountpoint that will be mounted upon. * @dentry_init_security: * Compute a context for a dentry as the inode is not yet available * since NFSv4 has no label backed by an EA anyway. @@ -1559,6 +1563,7 @@ struct security_operations { unsigned long kern_flags, unsigned long *set_kern_flags); int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); + int (*move_mount)(const struct path *from_path, const struct path *to_path); int (*dentry_init_security) (struct dentry *dentry, int mode, struct qstr *name, void **ctx, u32 *ctxlen); @@ -1880,6 +1885,7 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, unsigned long kern_flags, unsigned long *set_kern_flags); int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); +int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, struct qstr *name, void **ctx, u32 *ctxlen); @@ -2209,6 +2215,12 @@ static inline int security_sb_parse_opts_str(char *options, struct security_mnt_ return 0; } +static inline int security_move_mount(const struct path *from_path, + const struct path *to_path) +{ + return 0; +} + static inline int security_inode_alloc(struct inode *inode) { return 0; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0e30297..dce905f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -892,5 +892,8 @@ asmlinkage long sys_membarrier(int cmd, int flags); asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); +asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, + int to_dfd, const char __user *to_path, + unsigned int ms_flags); #endif diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0fc42f1..8c9e6a2 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -131,6 +131,17 @@ struct inodes_stat_t { #define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ #define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT__MASK 0x00000077 + /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ diff --git a/security/capability.c b/security/capability.c index aff9bb8..47cd798 100644 --- a/security/capability.c +++ b/security/capability.c @@ -114,6 +114,11 @@ static int cap_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) return 0; } +static int cap_move_mount(const struct path *from_path, const struct path *to_path) +{ + return 0; +} + static int cap_dentry_init_security(struct dentry *dentry, int mode, struct qstr *name, void **ctx, u32 *ctxlen) @@ -1019,6 +1024,7 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, sb_set_mnt_opts); set_to_cap_if_null(ops, sb_clone_mnt_opts); set_to_cap_if_null(ops, sb_parse_opts_str); + set_to_cap_if_null(ops, move_mount); set_to_cap_if_null(ops, dentry_init_security); set_to_cap_if_null(ops, dentry_create_files_as); set_to_cap_if_null(ops, inode_alloc_security); diff --git a/security/security.c b/security/security.c index 44aaadf..b6ba158e 100644 --- a/security/security.c +++ b/security/security.c @@ -345,6 +345,11 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) } EXPORT_SYMBOL(security_sb_parse_opts_str); +int security_move_mount(const struct path *from_path, const struct path *to_path) +{ + return security_ops->move_mount(from_path, to_path); +} + int security_inode_alloc(struct inode *inode) { inode->i_security = NULL; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 84a6f1a..aa7bb4b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2815,6 +2815,14 @@ static int selinux_mount(const char *dev_name, return path_has_perm(cred, path, FILE__MOUNTON); } +static int selinux_move_mount(const struct path *from_path, + const struct path *to_path) +{ + const struct cred *cred = current_cred(); + + return path_has_perm(cred, to_path, FILE__MOUNTON); +} + static int selinux_umount(struct vfsmount *mnt, int flags) { const struct cred *cred = current_cred(); @@ -6316,6 +6324,7 @@ static struct security_operations selinux_ops = { .sb_set_mnt_opts = selinux_set_mnt_opts, .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, .sb_parse_opts_str = selinux_parse_opts_str, + .move_mount = selinux_move_mount, .dentry_init_security = selinux_dentry_init_security, .dentry_create_files_as = selinux_dentry_create_files_as, _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel