__ptrace_may_access() checks can happen on target tasks that are in the
middle of do_exit(), past exit_mm(). At that point, the ->mm pointer has
been NULLed out, and the mm_struct has been mmput().

Unfortunately, the mm_struct contains the dumpability and the user_ns in
which the task last went through execve(), and we need those for
__ptrace_may_access(). Currently, that problem is handled by failing open:
If the ->mm is gone, we assume that the task was dumpable. In some edge
cases, this could potentially expose access to things like
/proc/$pid/fd/$fd of originally non-dumpable processes.
(exit_files() comes after exit_mm(), so the file descriptor table is still
there when we've gone through exit_mm().)

One way to fix this would be to move mm->user_ns and the dumpability state
over into the task_struct. However, that gets quite ugly if we want to
preserve existing semantics because e.g. PR_SET_DUMPABLE and commit_creds()
would then have to scan through all tasks sharing the mm_struct and keep
them in sync manually - that'd be a bit error-prone and overcomplicated.

(Moving these things into the signal_struct is not an option because that
is kept across executions, and pre-execve co-threads will share the
signal_struct that is also used by the task that has gone through
execve().)

I believe that this patch may be the least bad option to fix this - keep
the mm_struct (but not process memory) around with an mmgrab() reference
from exit_mm() until the task goes away completely.

Note that this moves free_task() down in order to make mmdrop_async()
available without a forward declaration.

Cc: sta...@vger.kernel.org
Fixes: bfedb589252c ("mm: Add a user_ns owner to mm_struct and fix ptrace 
permission checks")
Signed-off-by: Jann Horn <ja...@google.com>
---
 include/linux/sched.h |  8 +++++++
 kernel/exit.c         |  2 ++
 kernel/fork.c         | 54 ++++++++++++++++++++++---------------------
 kernel/ptrace.c       | 10 ++++++++
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..55bec6ff5626 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -747,6 +747,14 @@ struct task_struct {
 
        struct mm_struct                *mm;
        struct mm_struct                *active_mm;
+       /*
+        * When we exit and ->mm (the reference pinning ->mm's address space)
+        * goes away, we stash a reference to the mm_struct itself (counted via
+        * exit_mm->mm_count) in this member.
+        * This allows us to continue using the mm_struct for security checks
+        * and such even after the task has started exiting.
+        */
+       struct mm_struct                *exit_mm;
 
        /* Per-thread vma caching: */
        struct vmacache                 vmacache;
diff --git a/kernel/exit.c b/kernel/exit.c
index 733e80f334e7..97253ef33486 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -476,6 +476,8 @@ static void exit_mm(void)
        /* more a memory barrier than a real lock */
        task_lock(current);
        current->mm = NULL;
+       mmgrab(mm); /* for current->exit_mm */
+       current->exit_mm = mm;
        mmap_read_unlock(mm);
        enter_lazy_tlb(mm, current);
        task_unlock(current);
diff --git a/kernel/fork.c b/kernel/fork.c
index da8d360fb032..4942428a217c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -438,32 +438,6 @@ void put_task_stack(struct task_struct *tsk)
 }
 #endif
 
-void free_task(struct task_struct *tsk)
-{
-       scs_release(tsk);
-
-#ifndef CONFIG_THREAD_INFO_IN_TASK
-       /*
-        * The task is finally done with both the stack and thread_info,
-        * so free both.
-        */
-       release_task_stack(tsk);
-#else
-       /*
-        * If the task had a separate stack allocation, it should be gone
-        * by now.
-        */
-       WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
-#endif
-       rt_mutex_debug_task_free(tsk);
-       ftrace_graph_exit_task(tsk);
-       arch_release_task_struct(tsk);
-       if (tsk->flags & PF_KTHREAD)
-               free_kthread_struct(tsk);
-       free_task_struct(tsk);
-}
-EXPORT_SYMBOL(free_task);
-
 #ifdef CONFIG_MMU
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
@@ -722,6 +696,34 @@ static inline void put_signal_struct(struct signal_struct 
*sig)
                free_signal_struct(sig);
 }
 
+void free_task(struct task_struct *tsk)
+{
+       scs_release(tsk);
+
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+       /*
+        * The task is finally done with both the stack and thread_info,
+        * so free both.
+        */
+       release_task_stack(tsk);
+#else
+       /*
+        * If the task had a separate stack allocation, it should be gone
+        * by now.
+        */
+       WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
+#endif
+       rt_mutex_debug_task_free(tsk);
+       ftrace_graph_exit_task(tsk);
+       arch_release_task_struct(tsk);
+       if (tsk->flags & PF_KTHREAD)
+               free_kthread_struct(tsk);
+       if (tsk->exit_mm)
+               mmdrop_async(tsk->exit_mm);
+       free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
 void __put_task_struct(struct task_struct *tsk)
 {
        WARN_ON(!tsk->exit_state);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 43d6179508d6..0aedc6cf5bdc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -342,7 +342,17 @@ static int __ptrace_may_access(struct task_struct *task, 
unsigned int mode)
         * Pairs with a write barrier in commit_creds().
         */
        smp_rmb();
+       /*
+        * Look up the target task's mm_struct. If it fails because the task is
+        * exiting and has gone through exit_mm(), we can instead use ->exit_mm
+        * as long as we only use members that are preserved by an mmgrab()
+        * reference.
+        * The only case in which both ->mm and ->exit_mm can be NULL should be
+        * kernel threads.
+        */
        mm = task->mm;
+       if (!mm)
+               mm = task->exit_mm;
        if (mm &&
            ((get_dumpable(mm) != SUID_DUMP_USER) &&
             !ptrace_has_cap(cred, mm->user_ns, mode)))
-- 
2.29.0.rc1.297.gfa9743e501-goog

Reply via email to