Migration testing in i915 assumes current task's address space
to allocate new userspace mapping and uses it without
registering real user for that address space in mm_struct.
On single NUMA node setups PCI probe executes in the same
context as userspace process calling the test (i915_selftest
from IGT), but when multiple nodes are available, the PCI code
puts probe into a kernel workqueue. This switches execution to
a kworker, which does not have its own address space in
userspace and must borrow such memory from another process, so
"current->active_mm" is unknown at the start of the test.

It was observed that mm->mm_users would occasionally be 0
or drop to 0 during the test due to short delay between
scheduling and executing work in forked process, which reaped
userspace mappings, further leading to failures upon reading
from userland memory.

Prevent this by adding a PID parameter to a trusted task, so its
mm struct may be used if needed.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14204
Signed-off-by: Krzysztof Karas <[email protected]>
---
v7 (Andi):
 * Add missing mm reference release on error path.

v8:
 * Keep reference to mm open for the duration of test for
 readability. (Sebastian)
 * Be paranoic and explicit about keeping the mm reference,
 so we are **really** sure about userspace mappings not
 diappearing.

v9:
 * Drop "Fixes" tag. (Andi)
 * Revert to using a separate function for mm acquisition. (Andi)
 * Keep kthread_use/unuse and mmget/mmput calls symmetric. (Janusz)

 drivers/gpu/drm/i915/i915_selftest.h          |  1 +
 .../gpu/drm/i915/selftests/i915_selftest.c    | 68 ++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_selftest.h 
b/drivers/gpu/drm/i915/i915_selftest.h
index 72922028f4ba..e29ca298e7eb 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -35,6 +35,7 @@ struct i915_selftest {
        unsigned long timeout_jiffies;
        unsigned int timeout_ms;
        unsigned int random_seed;
+       unsigned int userspace_pid;
        char *filter;
        int mock;
        int live;
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c 
b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 8460f0a70d04..1e8494bab14b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -181,11 +181,48 @@ __wait_gsc_huc_load_completed(struct drm_i915_private 
*i915)
                pr_warn(DRIVER_NAME "Timed out waiting for huc load via 
GSC!\n");
 }
 
+static struct mm_struct *
+get_mm(int u_pid_nr)
+{
+       struct pid *u_pid = find_get_pid(u_pid_nr);
+       struct task_struct *task = NULL;
+       struct mm_struct *mm = NULL;
+
+       if (!u_pid) {
+               pr_warn("Could not find PID: %d\n", u_pid_nr);
+               return NULL;
+       }
+
+       task = get_pid_task(u_pid, PIDTYPE_PID);
+       put_pid(u_pid);
+       if (!task) {
+               pr_warn("Could not find task for PID: %d\n", u_pid_nr);
+               return NULL;
+       }
+
+       if (task->flags & PF_KTHREAD) {
+               pr_warn("Task not in userspace: %d\n", u_pid_nr);
+               put_task_struct(task);
+               return NULL;
+       }
+
+       mm = get_task_mm(task);
+       put_task_struct(task);
+       if (!mm) {
+               pr_warn("Could not find address space of task with PID: %d\n", 
u_pid_nr);
+               return NULL;
+       }
+
+       return mm;
+}
+
 static int __run_selftests(const char *name,
                           struct selftest *st,
                           unsigned int count,
                           void *data)
 {
+       int u_pid_nr = i915_selftest.userspace_pid;
+       struct mm_struct *mm = NULL;
        int err = 0;
 
        while (!i915_selftest.random_seed)
@@ -201,14 +238,36 @@ static int __run_selftests(const char *name,
        pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x 
st_timeout=%u\n",
                name, i915_selftest.random_seed, i915_selftest.timeout_ms);
 
+       /*
+        * If we are running in a kthread on a multi NUMA system and the user 
passed
+        * a valid PID of a userspace task, then we may borrow its address space
+        * to prepare a safe environment for the mmap selftests.
+        */
+       if (!current->mm) {
+               mm = get_mm(u_pid_nr);
+               if (mm) {
+                       kthread_use_mm(mm);
+                       if (unlikely(!current->mm)) {
+                               mmput(mm);
+                               mm = NULL;
+                               pr_warn("Could not set mm as current->mm\n");
+                       }
+               }
+       }
+
        /* Tests are listed in order in i915_*_selftests.h */
        for (; count--; st++) {
                if (!st->enabled)
                        continue;
 
                cond_resched();
-               if (signal_pending(current))
+               if (signal_pending(current)) {
+                       if (mm) {
+                               kthread_unuse_mm(mm);
+                               mmput_async(mm);
+                       }
                        return -EINTR;
+               }
 
                pr_info(DRIVER_NAME ": Running %s\n", st->name);
                if (data)
@@ -226,6 +285,11 @@ static int __run_selftests(const char *name,
                 st->name, err))
                err = -1;
 
+       if (mm) {
+               kthread_unuse_mm(mm);
+               mmput_async(mm);
+       }
+
        return err;
 }
 
@@ -507,6 +571,8 @@ void igt_hexdump(const void *buf, size_t len)
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
 module_param_named(st_filter, i915_selftest.filter, charp, 0400);
+module_param_named(st_userspace_pid, i915_selftest.userspace_pid, uint, 0400);
+MODULE_PARM_DESC(st_userspace_pid, "For usage in tests that map userspace 
memory and require address space with controllable lifetime.");
 
 module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);
 MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock 
hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then 
leave dummy module)");
-- 
2.34.1

Reply via email to