In seccomp_attach_filter when syncing the filter between all of the threads in a thread group if any thread is performing an exec fail to attach the filter immediately.
AKA "seccomp(SECCOMP_SET_MODE_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC...)" will now fail if one of the threads is executing exec. Prior to this change when one of the threads was performing an exec seccomp_attach_filter would block on cred_guard_mutex until the thread performing the exec called de_thread and killed all of the other threads. With the result that seccomp would not set the new filter. So this winds up being a very small change in semantics that causes the filter to not be set if the exec fails. Given that doing anything in any thread in parallel to an exec is racy with the threads being killed I will be surprised if anything in userspace will care about this semantic change. To allow in_execve to be read atomically with the seccomp state of the threads in the thread group wrap the setting and clearing of current->in_execve with the siglock from struct sighand_struct. The code in seccomp_attach_filter has been taking the cred_guard_mutex to ensure that the state of seccomp does not change during or after the calculation of the new credentials during exec. With seccomp_can_sync_threads updated to test in_execve and fail immediately taking cred_guard_mutex is no longer necessary. Signed-off-by: "Eric W. Biederman" <[email protected]> --- I think in general this is the right thing to do. The big advantage besides less and simpler code is that after this the only places using cred_guard_mutex are places where ptrace attach should not happen right now. Which makes cred_guard_mutex somewhat easier to reason about. fs/exec.c | 16 +++++++++++++--- kernel/seccomp.c | 29 ++++++++--------------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index a91003e28eaa..9d9eabbed9a9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1885,6 +1885,16 @@ static int exec_binprm(struct linux_binprm *bprm) return 0; } +static void set_in_execve(bool in_execve) +{ + struct task_struct *me = current; + spinlock_t *lock = &me->sighand->siglock; + + spin_lock_irq(lock); + me->in_execve = in_execve; + spin_unlock_irq(lock); +} + /* * sys_execve() executes a new program. */ @@ -1904,7 +1914,7 @@ static int bprm_execve(struct linux_binprm *bprm, goto out_files; check_unsafe_exec(bprm); - current->in_execve = 1; + set_in_execve(true); file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); @@ -1934,7 +1944,7 @@ static int bprm_execve(struct linux_binprm *bprm, /* execve succeeded */ current->fs->in_exec = 0; - current->in_execve = 0; + set_in_execve(false); rseq_execve(current); acct_update_integrals(current); task_numa_free(current, false); @@ -1954,7 +1964,7 @@ static int bprm_execve(struct linux_binprm *bprm, out_unmark: current->fs->in_exec = 0; - current->in_execve = 0; + set_in_execve(false); out_files: if (displaced) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 3ee59ce0a323..2e78520ff2fd 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -379,7 +379,7 @@ static int is_ancestor(struct seccomp_filter *parent, /** * seccomp_can_sync_threads: checks if all threads can be synchronized * - * Expects sighand and cred_guard_mutex locks to be held. + * Expects sighand siglock to be held. * * Returns 0 on success, -ve on error, or the pid of a thread which was * either not in the correct seccomp mode or did not have an ancestral @@ -389,7 +389,6 @@ static inline pid_t seccomp_can_sync_threads(void) { struct task_struct *thread, *caller; - BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); assert_spin_locked(¤t->sighand->siglock); /* Validate all threads being eligible for synchronization. */ @@ -401,10 +400,11 @@ static inline pid_t seccomp_can_sync_threads(void) if (thread == caller) continue; - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED || - (thread->seccomp.mode == SECCOMP_MODE_FILTER && - is_ancestor(thread->seccomp.filter, - caller->seccomp.filter))) + if (!thread->in_execve && + (thread->seccomp.mode == SECCOMP_MODE_DISABLED || + (thread->seccomp.mode == SECCOMP_MODE_FILTER && + is_ancestor(thread->seccomp.filter, + caller->seccomp.filter)))) continue; /* Return the first thread that cannot be synchronized. */ @@ -474,16 +474,14 @@ void seccomp_filter_release(struct task_struct *tsk) /** * seccomp_sync_threads: sets all threads to use current's filter * - * Expects sighand and cred_guard_mutex locks to be held, and for - * seccomp_can_sync_threads() to have returned success already - * without dropping the locks. + * Expects sighand siglock to be held, and for seccomp_can_sync_threads() + * to have returned success already without dropping the locks. * */ static inline void seccomp_sync_threads(unsigned long flags) { struct task_struct *thread, *caller; - BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex)); assert_spin_locked(¤t->sighand->siglock); /* Synchronize all threads. */ @@ -1551,14 +1549,6 @@ static long seccomp_set_mode_filter(unsigned int flags, } } - /* - * Make sure we cannot change seccomp or nnp state via TSYNC - * while another thread is in the middle of calling exec. - */ - if (flags & SECCOMP_FILTER_FLAG_TSYNC && - mutex_lock_killable(¤t->signal->cred_guard_mutex)) - goto out_put_fd; - spin_lock_irq(¤t->sighand->siglock); if (!seccomp_may_assign_mode(seccomp_mode)) @@ -1573,9 +1563,6 @@ static long seccomp_set_mode_filter(unsigned int flags, seccomp_assign_mode(current, seccomp_mode, flags); out: spin_unlock_irq(¤t->sighand->siglock); - if (flags & SECCOMP_FILTER_FLAG_TSYNC) - mutex_unlock(¤t->signal->cred_guard_mutex); -out_put_fd: if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { if (ret) { listener_f->private_data = NULL; -- 2.20.1

