Add spawn_template_spawn() to start a child from a template fd. The child uses the template's pinned executable file, runs per-spawn fd, cwd, and signal actions, closes non-stdio fds by default, and then executes through the normal opened-file exec path. Return a pidfd for the child so userspace can wait or signal it without racy pid reuse. Keep fd inheritance opt-in with SPAWN_TEMPLATE_SPAWN_INHERIT_FDS. This patch consumes cached template state but does not add ELF metadata caching; executable identity and ELF metadata caching are added separately.
Signed-off-by: Li Chen <[email protected]> --- fs/spawn_template.c | 346 +++++++++++++++++++++++++++++++++++++++ include/linux/syscalls.h | 4 + 2 files changed, 350 insertions(+) diff --git a/fs/spawn_template.c b/fs/spawn_template.c index 280a1038cc45e..8c3711929cffb 100644 --- a/fs/spawn_template.c +++ b/fs/spawn_template.c @@ -1,14 +1,24 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/anon_inodes.h> +#include <linux/binfmts.h> +#include <linux/close_range.h> #include <linux/cred.h> #include <linux/err.h> #include <linux/fcntl.h> +#include <linux/fdtable.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/fs_struct.h> #include <linux/kernel.h> +#include <linux/namei.h> +#include <linux/sched/signal.h> +#include <linux/sched/task.h> +#include <linux/signal.h> #include <linux/slab.h> +#include <linux/string.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <uapi/linux/openat2.h> #include <uapi/linux/spawn_template.h> #include "internal.h" @@ -22,8 +32,262 @@ struct spawn_template { bool deny_write; }; +struct spawn_template_spawn_context { + struct spawn_template *tmpl; + struct spawn_template_spawn_args args; + struct spawn_template_action *actions; +}; + static const struct file_operations spawn_template_fops; +static int spawn_template_exit_status(int err) +{ + switch (err) { + case -ENOENT: + return 127; + case -EACCES: + case -ENOEXEC: + return 126; + default: + return 1; + } +} + +static bool spawn_template_cred_matches(struct spawn_template *tmpl) +{ + return current_cred() == tmpl->creator_cred; +} + +static int spawn_template_copy_signal_set(const struct spawn_template_action *action, + sigset_t *mask) +{ + struct spawn_template_sigset sigset; + + if (!action->arg) + return -EINVAL; + if (copy_from_user(&sigset, u64_to_user_ptr(action->arg), + sizeof(sigset))) + return -EFAULT; + if (sigset.sigsetsize != sizeof(sigset_t)) + return -EINVAL; + if (copy_from_user(mask, u64_to_user_ptr(sigset.sigset), sizeof(*mask))) + return -EFAULT; + sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + + return 0; +} + +static int spawn_template_apply_open(const struct spawn_template_action *action) +{ + struct spawn_template_open open; + struct file *file __free(fput) = NULL; + struct file *tmp; + struct open_flags op; + int ret; + + if (action->fd < AT_FDCWD || action->newfd < 0 || action->flags || + !action->arg) + return -EINVAL; + + if (copy_from_user(&open, u64_to_user_ptr(action->arg), sizeof(open))) + return -EFAULT; + + ret = build_open_flags(&open.how, &op); + if (ret) + return ret; + + CLASS(filename_flags, name)(u64_to_user_ptr(open.path), op.lookup_flags); + tmp = do_file_open(action->fd, name, &op); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + file = tmp; + + return replace_fd(action->newfd, file, open.how.flags & O_CLOEXEC); +} + +static int spawn_template_apply_sigmask(const struct spawn_template_action *action) +{ + sigset_t mask; + int ret; + + if (action->fd || action->newfd || action->flags) + return -EINVAL; + + ret = spawn_template_copy_signal_set(action, &mask); + if (ret) + return ret; + + set_current_blocked(&mask); + return 0; +} + +static int spawn_template_apply_sigdefault(const struct spawn_template_action *action) +{ + sigset_t mask; + struct k_sigaction sa = {}; + int ret; + int sig; + + if (action->fd || action->newfd || action->flags) + return -EINVAL; + + ret = spawn_template_copy_signal_set(action, &mask); + if (ret) + return ret; + + sa.sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa.sa_mask); + + for (sig = 1; sig < _NSIG; sig++) { + if (!sigismember(&mask, sig)) + continue; + ret = do_sigaction(sig, &sa, NULL); + if (ret) + return ret; + } + + return 0; +} + +static int spawn_template_apply_action(const struct spawn_template_action *action) +{ + switch (action->type) { + case SPAWN_TEMPLATE_ACTION_CLOSE: + return close_fd(action->fd); + case SPAWN_TEMPLATE_ACTION_DUP2: + if (action->fd == action->newfd) { + if (action->flags) + return -EINVAL; + CLASS(fd, f)(action->fd); + + if (fd_empty(f)) + return -EBADF; + return 0; + } + return ksys_dup3(action->fd, action->newfd, action->flags); + case SPAWN_TEMPLATE_ACTION_FCHDIR: { + CLASS(fd, f)(action->fd); + int ret; + + if (fd_empty(f)) + return -EBADF; + if (!d_can_lookup(fd_file(f)->f_path.dentry)) + return -ENOTDIR; + + ret = file_permission(fd_file(f), MAY_EXEC | MAY_CHDIR); + if (!ret) + set_fs_pwd(current->fs, &fd_file(f)->f_path); + return ret; + } + case SPAWN_TEMPLATE_ACTION_OPEN: + return spawn_template_apply_open(action); + case SPAWN_TEMPLATE_ACTION_CLOSE_RANGE: + return do_close_range(action->fd, action->newfd, action->flags); + case SPAWN_TEMPLATE_ACTION_SIGMASK: + return spawn_template_apply_sigmask(action); + case SPAWN_TEMPLATE_ACTION_SIGDEFAULT: + return spawn_template_apply_sigdefault(action); + default: + return -EINVAL; + } +} + +static int spawn_template_copy_actions(struct spawn_template_action **out_actions, + u64 count, u64 uaddr) +{ + struct spawn_template_action __user *uactions; + struct spawn_template_action *actions __free(kfree) = NULL; + struct spawn_template_action *tmp; + u64 i; + + *out_actions = NULL; + if (!count) + return 0; + if (count > SPAWN_TEMPLATE_MAX_ACTIONS) + return -E2BIG; + if (!uaddr) + return -EINVAL; + + uactions = u64_to_user_ptr(uaddr); + tmp = memdup_array_user(uactions, count, sizeof(*actions)); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + actions = tmp; + + for (i = 0; i < count; i++) { + switch (actions[i].type) { + case SPAWN_TEMPLATE_ACTION_CLOSE: + if (actions[i].fd < 0 || actions[i].flags || + actions[i].newfd || actions[i].arg) + return -EINVAL; + break; + case SPAWN_TEMPLATE_ACTION_DUP2: + if (actions[i].fd < 0 || actions[i].newfd < 0 || + (actions[i].flags & ~O_CLOEXEC) || actions[i].arg) + return -EINVAL; + break; + case SPAWN_TEMPLATE_ACTION_FCHDIR: + if (actions[i].fd < 0 || actions[i].flags || + actions[i].newfd || actions[i].arg) + return -EINVAL; + break; + case SPAWN_TEMPLATE_ACTION_OPEN: + if (actions[i].fd < AT_FDCWD || actions[i].newfd < 0 || + actions[i].flags || !actions[i].arg) + return -EINVAL; + break; + case SPAWN_TEMPLATE_ACTION_CLOSE_RANGE: + if (actions[i].fd < 0 || actions[i].newfd < 0 || + actions[i].fd > actions[i].newfd || + (actions[i].flags & + ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC)) || + actions[i].arg) + return -EINVAL; + break; + case SPAWN_TEMPLATE_ACTION_SIGMASK: + case SPAWN_TEMPLATE_ACTION_SIGDEFAULT: + if (actions[i].fd || actions[i].newfd || + actions[i].flags || !actions[i].arg) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + *out_actions = no_free_ptr(actions); + return 0; +} + +static int spawn_template_child(void *data) +{ + struct spawn_template_spawn_context *ctx = data; + struct spawn_template *tmpl = ctx->tmpl; + int ret; + u64 i; + + for (i = 0; i < ctx->args.actions_len; i++) { + ret = spawn_template_apply_action(&ctx->actions[i]); + if (ret < 0) + goto out_exec_error; + } + + if (!(ctx->args.flags & SPAWN_TEMPLATE_SPAWN_INHERIT_FDS)) { + ret = do_close_range(3, ~0U, 0); + if (ret < 0) + goto out_exec_error; + } + + ret = kernel_execveat_file(tmpl->exec_file, "", + u64_to_user_ptr(ctx->args.argv), + u64_to_user_ptr(ctx->args.envp), + AT_EMPTY_PATH); +out_exec_error: + if (ret < 0) + do_exit(spawn_template_exit_status(ret)); + return 0; +} + static bool spawn_template_file_exec_allowed(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) @@ -53,6 +317,18 @@ static const struct file_operations spawn_template_fops = { .llseek = noop_llseek, }; +static struct file *spawn_template_file_from_fd(int fd) +{ + CLASS(fd, f)(fd); + + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (fd_file(f)->f_op != &spawn_template_fops) + return ERR_PTR(-EINVAL); + + return get_file(fd_file(f)); +} + static int spawn_template_open_execfd(int execfd, struct file **file, bool *deny_write) { @@ -178,3 +454,73 @@ SYSCALL_DEFINE2(spawn_template_create, kfree(tmpl); return ret; } + +SYSCALL_DEFINE3(spawn_template_spawn, int, template_fd, + struct spawn_template_spawn_args __user *, uargs, + size_t, usize) +{ + struct spawn_template_spawn_context *ctx; + struct kernel_clone_args kargs; + struct file *template_file; + int ret; + + BUILD_BUG_ON(sizeof(struct spawn_template_spawn_args) != + SPAWN_TEMPLATE_SPAWN_ARGS_SIZE_VER0); + + if (usize < SPAWN_TEMPLATE_SPAWN_ARGS_SIZE_VER0) + return -EINVAL; + if (usize > PAGE_SIZE) + return -E2BIG; + + template_file = spawn_template_file_from_fd(template_fd); + if (IS_ERR(template_file)) + return PTR_ERR(template_file); + + if (!spawn_template_cred_matches(template_file->private_data)) { + ret = -EACCES; + goto out_put_template; + } + + ctx = kzalloc_obj(*ctx, GFP_KERNEL); + if (!ctx) { + ret = -ENOMEM; + goto out_put_template; + } + + ctx->tmpl = template_file->private_data; + + ret = copy_struct_from_user(&ctx->args, sizeof(ctx->args), uargs, + usize); + if (ret) + goto out_free_ctx; + + if ((ctx->args.flags & ~SPAWN_TEMPLATE_SPAWN_INHERIT_FDS) || + !ctx->args.pidfd || ctx->args.reserved[0] || + ctx->args.reserved[1] || ctx->args.reserved[2] || + ctx->args.reserved[3]) { + ret = -EINVAL; + goto out_free_ctx; + } + + ret = spawn_template_copy_actions(&ctx->actions, ctx->args.actions_len, + ctx->args.actions); + if (ret) + goto out_free_ctx; + + kargs = (struct kernel_clone_args) { + .flags = CLONE_VM | CLONE_VFORK | CLONE_PIDFD, + .pidfd = u64_to_user_ptr(ctx->args.pidfd), + .exit_signal = SIGCHLD, + .fn = spawn_template_child, + .fn_arg = ctx, + }; + + ret = kernel_clone(&kargs); + + kfree(ctx->actions); +out_free_ctx: + kfree(ctx); +out_put_template: + fput(template_file); + return ret; +} diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4b41950488bd6..df7368edf6778 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -68,6 +68,7 @@ union bpf_attr; struct io_uring_params; struct clone_args; struct spawn_template_create_args; +struct spawn_template_spawn_args; struct open_how; struct mount_attr; struct landlock_ruleset_attr; @@ -824,6 +825,9 @@ asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size); asmlinkage long sys_spawn_template_create(struct spawn_template_create_args __user *uargs, size_t size); +asmlinkage long sys_spawn_template_spawn(int template_fd, + struct spawn_template_spawn_args __user *uargs, + size_t size); asmlinkage long sys_execve(const char __user *filename, const char __user *const __user *argv, -- 2.52.0

