Add upgrade restrictions to openat2(). Extend struct open_how to allow setting transitive restrictions on using file descriptors to open other files. A use case for this feature is to block services or containers from re-opening/upgrading an O_PATH file descriptor through e.g. /proc/<pid>/fd/<nr> as O_WRONLY.
The idea for this features comes form the UAPI group kernel feature idea list [1]. [1] https://github.com/uapi-group/kernel-features?tab=readme-ov-file#upgrade-masks-in-openat2 Signed-off-by: Jori Koolstra <[email protected]> --- fs/file_table.c | 2 ++ fs/internal.h | 1 + fs/namei.c | 41 +++++++++++++++++++++++++++++++++--- fs/open.c | 9 ++++++++ fs/proc/base.c | 24 +++++++++++++++------ fs/proc/fd.c | 6 +++++- fs/proc/internal.h | 4 +++- include/linux/fcntl.h | 6 +++++- include/linux/fs.h | 1 + include/linux/namei.h | 15 ++++++++++++- include/uapi/linux/openat2.h | 6 ++++++ 11 files changed, 101 insertions(+), 14 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index aaa5faaace1e..b98038009fd2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -196,6 +196,8 @@ static int init_file(struct file *f, int flags, const struct cred *cred) f->f_wb_err = 0; f->f_sb_err = 0; + f->f_allowed_upgrades = VALID_UPGRADE_FLAGS; + /* * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While * fget-rcu pattern users need to be able to handle spurious diff --git a/fs/internal.h b/fs/internal.h index cbc384a1aa09..0a37bb208184 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -189,6 +189,7 @@ struct open_flags { int acc_mode; int intent; int lookup_flags; + unsigned int allowed_upgrades; }; extern struct file *do_file_open(int dfd, struct filename *pathname, const struct open_flags *op); diff --git a/fs/namei.c b/fs/namei.c index 58f715f7657e..c3d48709a73b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -743,6 +743,7 @@ struct nameidata { int dfd; vfsuid_t dir_vfsuid; umode_t dir_mode; + unsigned int allowed_upgrades; } __randomize_layout; #define ND_ROOT_PRESET 1 @@ -760,6 +761,7 @@ static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name) p->path.mnt = NULL; p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; + p->allowed_upgrades = VALID_UPGRADE_FLAGS; p->saved = old; current->nameidata = p; } @@ -1156,11 +1158,15 @@ static int nd_jump_root(struct nameidata *nd) return 0; } +const struct jump_how jump_how_unrestricted = { + .allowed_upgrades = VALID_UPGRADE_FLAGS +}; + /* * Helper to directly jump to a known parsed path from ->get_link, * caller must have taken a reference to path beforehand. */ -int nd_jump_link(const struct path *path) +int nd_jump_link_how(const struct path *path, const struct jump_how *how) { int error = -ELOOP; struct nameidata *nd = current->nameidata; @@ -1181,6 +1187,7 @@ int nd_jump_link(const struct path *path) nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->state |= ND_JUMPED; + nd->allowed_upgrades &= how->allowed_upgrades; return 0; err: @@ -2738,6 +2745,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags) if (fd_empty(f)) return ERR_PTR(-EBADF); + nd->allowed_upgrades = fd_file(f)->f_allowed_upgrades; + if (flags & LOOKUP_LINKAT_EMPTY) { if (fd_file(f)->f_cred != current_cred() && !ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH)) @@ -4266,6 +4275,28 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, return 0; } +static bool may_upgrade(const int flag, const unsigned int allowed_upgrades) +{ + int mode = flag & O_ACCMODE; + unsigned int allowed = allowed_upgrades & ~DENY_UPGRADES; + + if (mode != O_WRONLY && !(allowed & READ_UPGRADABLE)) + return false; + if (mode != O_RDONLY && !(allowed & WRITE_UPGRADABLE)) + return false; + return true; +} + +static int may_open_upgrade(struct mnt_idmap *idmap, const struct path *path, + int acc_mode, int flag, + const unsigned int allowed_upgrades) +{ + if (!may_upgrade(flag, allowed_upgrades)) + return -EACCES; + + return may_open(idmap, path, acc_mode, flag); +} + static int handle_truncate(struct mnt_idmap *idmap, struct file *filp) { const struct path *path = &filp->f_path; @@ -4666,7 +4697,8 @@ static int do_open(struct nameidata *nd, return error; do_truncate = true; } - error = may_open(idmap, &nd->path, acc_mode, open_flag); + error = may_open_upgrade(idmap, &nd->path, acc_mode, open_flag, + nd->allowed_upgrades); if (!error && !(file->f_mode & FMODE_OPENED)) error = vfs_open(&nd->path, file); if (!error) @@ -4831,8 +4863,11 @@ static struct file *path_openat(struct nameidata *nd, terminate_walk(nd); } if (likely(!error)) { - if (likely(file->f_mode & FMODE_OPENED)) + if (likely(file->f_mode & FMODE_OPENED)) { + file->f_allowed_upgrades = + op->allowed_upgrades & nd->allowed_upgrades; return file; + } WARN_ON(1); error = -EINVAL; } diff --git a/fs/open.c b/fs/open.c index e019ddecc73c..8b6ea5f90c6e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1167,6 +1167,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) struct open_how how = { .flags = ((unsigned int) flags) & VALID_OPEN_FLAGS, .mode = mode & S_IALLUGO, + .allowed_upgrades = VALID_UPGRADE_FLAGS }; /* O_PATH beats everything else. */ @@ -1299,6 +1300,14 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) } op->lookup_flags = lookup_flags; + + if (how->allowed_upgrades == 0) + op->allowed_upgrades = VALID_UPGRADE_FLAGS; + else if (how->allowed_upgrades & ~VALID_UPGRADE_FLAGS) + return -EINVAL; + else + op->allowed_upgrades = how->allowed_upgrades; + return 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 4c863d17dfb4..3f3a471bbb75 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -218,7 +218,8 @@ static int get_task_root(struct task_struct *task, struct path *root) return result; } -static int proc_cwd_link(struct dentry *dentry, struct path *path) +static int proc_cwd_link(struct dentry *dentry, struct path *path, + struct jump_how *jump_how) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; @@ -227,6 +228,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path) task_lock(task); if (task->fs) { get_fs_pwd(task->fs, path); + *jump_how = jump_how_unrestricted; result = 0; } task_unlock(task); @@ -235,7 +237,8 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path) return result; } -static int proc_root_link(struct dentry *dentry, struct path *path) +static int proc_root_link(struct dentry *dentry, struct path *path, + struct jump_how *jump_how) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; @@ -243,6 +246,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path) if (task) { result = get_task_root(task, path); put_task_struct(task); + *jump_how = jump_how_unrestricted; } return result; } @@ -1777,7 +1781,8 @@ static const struct file_operations proc_pid_set_comm_operations = { .release = single_release, }; -static int proc_exe_link(struct dentry *dentry, struct path *exe_path) +static int proc_exe_link(struct dentry *dentry, struct path *exe_path, + struct jump_how *jump_how) { struct task_struct *task; struct file *exe_file; @@ -1789,6 +1794,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) put_task_struct(task); if (exe_file) { *exe_path = exe_file->f_path; + *jump_how = jump_how_unrestricted; path_get(&exe_file->f_path); fput(exe_file); return 0; @@ -1801,6 +1807,7 @@ static const char *proc_pid_get_link(struct dentry *dentry, struct delayed_call *done) { struct path path; + struct jump_how jump_how; int error = -EACCES; if (!dentry) @@ -1810,11 +1817,11 @@ static const char *proc_pid_get_link(struct dentry *dentry, if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(dentry, &path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how); if (error) goto out; - error = nd_jump_link(&path); + error = nd_jump_link_how(&path, &jump_how); out: return ERR_PTR(error); } @@ -1848,12 +1855,13 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b int error = -EACCES; struct inode *inode = d_inode(dentry); struct path path; + struct jump_how jump_how; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(dentry, &path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path, &jump_how); if (error) goto out; @@ -2250,7 +2258,8 @@ static const struct dentry_operations tid_map_files_dentry_operations = { .d_delete = pid_delete_dentry, }; -static int map_files_get_link(struct dentry *dentry, struct path *path) +static int map_files_get_link(struct dentry *dentry, struct path *path, + struct jump_how *jump_how) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; @@ -2279,6 +2288,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) rc = -ENOENT; vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { + *jump_how = jump_how_unrestricted; *path = *file_user_path(vma->vm_file); path_get(path); rc = 0; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 9eeccff49b2a..344485e8cb6f 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -171,7 +171,8 @@ static const struct dentry_operations tid_fd_dentry_operations = { .d_delete = pid_delete_dentry, }; -static int proc_fd_link(struct dentry *dentry, struct path *path) +static int proc_fd_link(struct dentry *dentry, struct path *path, + struct jump_how *jump_how) { struct task_struct *task; int ret = -ENOENT; @@ -183,6 +184,9 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) fd_file = fget_task(task, fd); if (fd_file) { + *jump_how = (struct jump_how) { + .allowed_upgrades = fd_file->f_allowed_upgrades + }; *path = fd_file->f_path; path_get(&fd_file->f_path); ret = 0; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c1e8eb984da8..42f668059a30 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -14,6 +14,7 @@ #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/mm.h> +#include <linux/namei.h> struct ctl_table_header; struct mempolicy; @@ -107,7 +108,8 @@ extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); union proc_op { - int (*proc_get_link)(struct dentry *, struct path *); + int (*proc_get_link)(struct dentry *, struct path *, + struct jump_how *); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index d1bb87ff70e3..6506c2c6eca5 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -15,6 +15,9 @@ /* upper 32-bit flags (openat2(2) only) */ \ OPENAT2_EMPTY_PATH) +#define VALID_UPGRADE_FLAGS \ + (DENY_UPGRADES | READ_UPGRADABLE | WRITE_UPGRADABLE) + /* List of all valid flags for the how->resolve argument: */ #define VALID_RESOLVE_FLAGS \ (RESOLVE_NO_XDEV | RESOLVE_NO_MAGICLINKS | RESOLVE_NO_SYMLINKS | \ @@ -22,7 +25,8 @@ /* List of all open_how "versions". */ #define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ -#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0 +#define OPEN_HOW_SIZE_VER1 32 /* added allowed_upgrades */ +#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER1 #ifndef force_o_largefile #define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 8b3dd145b25e..697d2fc6322b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1296,6 +1296,7 @@ struct file { }; file_ref_t f_ref; /* --- cacheline 3 boundary (192 bytes) --- */ + unsigned int f_allowed_upgrades; } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 58600cf234bc..0c58ded7cd27 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -203,7 +203,20 @@ static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umo return mode; } -extern int __must_check nd_jump_link(const struct path *path); +struct jump_how { + unsigned int allowed_upgrades; +}; + +extern const struct jump_how jump_how_unrestricted; +#define JUMP_HOW_UNRESTRICTED &jump_how_unrestricted + +extern int __must_check nd_jump_link_how(const struct path *path, + const struct jump_how *how); + +static inline int nd_jump_link(const struct path *path) +{ + return nd_jump_link_how(path, JUMP_HOW_UNRESTRICTED); +} static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) { diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h index c34f32e6fa96..fc1147e6ce41 100644 --- a/include/uapi/linux/openat2.h +++ b/include/uapi/linux/openat2.h @@ -20,8 +20,14 @@ struct open_how { __u64 flags; __u64 mode; __u64 resolve; + __u64 allowed_upgrades; }; +/* how->allowed_upgrades flags for openat2(2). */ +#define DENY_UPGRADES 0x01 +#define READ_UPGRADABLE (0x02 | DENY_UPGRADES) +#define WRITE_UPGRADABLE (0x04 | DENY_UPGRADES) + /* how->resolve flags for openat2(2). */ #define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */ -- 2.53.0

