Add spawn_template_create() and back each template with an anon-inode fd. Creation records the per-template state that later spawns reuse: the opened executable file, optional absolute path, creator credential, and deny-write state. Keep write access denied until the template fd is released so cached state cannot race with writers. This patch only creates and releases template fds. Spawning and ELF metadata caching are added separately.
Signed-off-by: Li Chen <[email protected]> --- MAINTAINERS | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 - fs/Makefile | 2 +- fs/spawn_template.c | 180 +++++++++++++++++++++++++ include/linux/syscalls.h | 3 + 5 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 fs/spawn_template.c diff --git a/MAINTAINERS b/MAINTAINERS index d7b1191e33ca0..d5441812825c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9732,6 +9732,7 @@ F: Documentation/userspace-api/ELF.rst F: fs/*binfmt_*.c F: fs/Kconfig.binfmt F: fs/exec.c +F: fs/spawn_template.c F: fs/tests/binfmt_*_kunit.c F: fs/tests/exec_kunit.c F: include/linux/binfmts.h diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 524155d655da1..d6c1667e8f3b8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -396,7 +396,6 @@ 469 common file_setattr sys_file_setattr 470 common listns sys_listns 471 common rseq_slice_yield sys_rseq_slice_yield - # # Due to a historical design error, certain syscalls are numbered differently # in x32 as compared to native x86_64. These syscalls have numbers 512-547. diff --git a/fs/Makefile b/fs/Makefile index ae1b07f9c6a0c..796eb4ae143e5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -8,7 +8,7 @@ obj-y := open.o read_write.o file_table.o super.o \ - char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ + char_dev.o stat.o exec.o spawn_template.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ diff --git a/fs/spawn_template.c b/fs/spawn_template.c new file mode 100644 index 0000000000000..280a1038cc45e --- /dev/null +++ b/fs/spawn_template.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/anon_inodes.h> +#include <linux/cred.h> +#include <linux/err.h> +#include <linux/fcntl.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <uapi/linux/spawn_template.h> + +#include "internal.h" + +#define SPAWN_TEMPLATE_MAX_ACTIONS 256 + +struct spawn_template { + struct file *exec_file; + const struct cred *creator_cred; + char *filename; + bool deny_write; +}; + +static const struct file_operations spawn_template_fops; + +static bool spawn_template_file_exec_allowed(struct file *file) +{ + if (!S_ISREG(file_inode(file)->i_mode)) + return false; + if (path_noexec(&file->f_path)) + return false; + if (file_permission(file, MAY_EXEC)) + return false; + return can_mmap_file(file); +} + +static int spawn_template_release(struct inode *inode, struct file *file) +{ + struct spawn_template *tmpl = file->private_data; + + if (tmpl->deny_write) + exe_file_allow_write_access(tmpl->exec_file); + fput(tmpl->exec_file); + put_cred(tmpl->creator_cred); + kfree(tmpl->filename); + kfree(tmpl); + return 0; +} + +static const struct file_operations spawn_template_fops = { + .release = spawn_template_release, + .llseek = noop_llseek, +}; + +static int spawn_template_open_execfd(int execfd, struct file **file, + bool *deny_write) +{ + int ret; + + if (execfd < 0) + return -EINVAL; + + CLASS(fd, f)(execfd); + if (fd_empty(f)) + return -EBADF; + + if (!spawn_template_file_exec_allowed(fd_file(f))) + return -EACCES; + + ret = exe_file_deny_write_access(fd_file(f)); + if (ret) + return ret; + + *file = get_file(fd_file(f)); + *deny_write = true; + return 0; +} + +static int spawn_template_open_filename(u64 filename, struct file **file, + char **path, + bool *deny_write) +{ + char *kfilename __free(kfree) = NULL; + struct file *exec __free(fput) = NULL; + struct file *tmp_file; + char *tmp; + + if (!filename) + return -EINVAL; + + tmp = strndup_user(u64_to_user_ptr(filename), PATH_MAX); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + kfilename = tmp; + + tmp_file = open_exec(kfilename); + if (IS_ERR(tmp_file)) + return PTR_ERR(tmp_file); + exec = tmp_file; + if (!spawn_template_file_exec_allowed(exec)) { + exe_file_allow_write_access(exec); + return -EACCES; + } + + *file = no_free_ptr(exec); + *path = no_free_ptr(kfilename); + *deny_write = true; + return 0; +} + +SYSCALL_DEFINE2(spawn_template_create, + struct spawn_template_create_args __user *, uargs, + size_t, usize) +{ + struct spawn_template_create_args args; + struct spawn_template *tmpl; + int fd_flags = 0; + int ret; + + BUILD_BUG_ON(sizeof(struct spawn_template_create_args) != + SPAWN_TEMPLATE_CREATE_ARGS_SIZE_VER0); + + if (usize < SPAWN_TEMPLATE_CREATE_ARGS_SIZE_VER0) + return -EINVAL; + if (usize > PAGE_SIZE) + return -E2BIG; + + ret = copy_struct_from_user(&args, sizeof(args), uargs, usize); + if (ret) + return ret; + + if (args.flags & ~SPAWN_TEMPLATE_CREATE_CLOEXEC) + return -EINVAL; + if (args.exec_flags || args.reserved[0] || args.reserved[1] || + args.reserved[2] || args.reserved[3]) + return -EINVAL; + if (args.actions || args.actions_len) + return -EINVAL; + if ((args.execfd < 0 && !args.filename) || + (args.execfd >= 0 && args.filename)) + return -EINVAL; + + tmpl = kzalloc_obj(*tmpl, GFP_KERNEL); + if (!tmpl) + return -ENOMEM; + tmpl->creator_cred = get_current_cred(); + + if (args.filename) + ret = spawn_template_open_filename(args.filename, + &tmpl->exec_file, + &tmpl->filename, + &tmpl->deny_write); + else + ret = spawn_template_open_execfd(args.execfd, + &tmpl->exec_file, + &tmpl->deny_write); + if (ret) + goto out_free_tmpl; + + if (args.flags & SPAWN_TEMPLATE_CREATE_CLOEXEC) + fd_flags |= O_CLOEXEC; + + ret = anon_inode_getfd("spawn_template", &spawn_template_fops, tmpl, + fd_flags); + if (ret < 0) + goto out_put_exec; + + return ret; + +out_put_exec: + if (tmpl->deny_write) + exe_file_allow_write_access(tmpl->exec_file); + fput(tmpl->exec_file); +out_free_tmpl: + put_cred(tmpl->creator_cred); + kfree(tmpl->filename); + kfree(tmpl); + return ret; +} diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3dfc3269188a..4b41950488bd6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -67,6 +67,7 @@ struct rseq; union bpf_attr; struct io_uring_params; struct clone_args; +struct spawn_template_create_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; @@ -821,6 +822,8 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, #endif asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size); +asmlinkage long sys_spawn_template_create(struct spawn_template_create_args __user *uargs, + size_t size); asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, -- 2.52.0

