Migration testing in i915 assumes current task's address space to allocate new userspace mapping and uses it without registering real user for that address space in mm_struct. On single NUMA node setups PCI probe executes in the same context as userspace process calling the test (i915_selftest from IGT), but when multiple nodes are available, the PCI code puts probe into a kernel workqueue. This switches execution to a kworker, which does not have its own address space in userspace and must borrow such memory from another process, so "current->active_mm" is unknown at the start of the test.
It was observed that mm->mm_users would occasionally be 0 or drop to 0 during the test due to short delay between scheduling and executing work in forked process, which reaped userspace mappings, further leading to failures upon reading from userland memory. Prevent this by adding a PID parameter to a trusted task, so its mm struct may be used if needed. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14204 Signed-off-by: Krzysztof Karas <[email protected]> --- v8: * Keep reference to mm open for the duration of test for readability. (Sebastian) * Be paranoic and explicit about keeping the mm reference, so we are **really** sure about userspace mappings not diappearing. v9: * Drop "Fixes" tag. (Andi) * Revert to using a separate function for mm acquisition. (Andi) * Keep kthread_use/unuse and mmget/mmput calls symmetric. (Janusz) v10: * Initialize PID variable to a negative value and check for user provided value. (Janusz) drivers/gpu/drm/i915/i915_selftest.h | 1 + .../gpu/drm/i915/selftests/i915_selftest.c | 75 ++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 72922028f4ba..e29ca298e7eb 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -35,6 +35,7 @@ struct i915_selftest { unsigned long timeout_jiffies; unsigned int timeout_ms; unsigned int random_seed; + unsigned int userspace_pid; char *filter; int mock; int live; diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 8460f0a70d04..036328072e38 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -181,13 +181,57 @@ __wait_gsc_huc_load_completed(struct drm_i915_private *i915) pr_warn(DRIVER_NAME "Timed out waiting for huc load via GSC!\n"); } +static struct mm_struct * +get_mm(int u_pid_nr) +{ + struct task_struct *task = NULL; + struct mm_struct *mm = NULL; + struct pid *u_pid = NULL; + + if (u_pid_nr < 1) + return NULL; + + u_pid = find_get_pid(u_pid_nr); + if (!u_pid) { + pr_warn("Could not find PID: %d\n", u_pid_nr); + return NULL; + } + + task = get_pid_task(u_pid, PIDTYPE_PID); + put_pid(u_pid); + if (!task) { + pr_warn("Could not find task for PID: %d\n", u_pid_nr); + return NULL; + } + + if (task->flags & PF_KTHREAD) { + pr_warn("Task not in userspace: %d\n", u_pid_nr); + put_task_struct(task); + return NULL; + } + + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) { + pr_warn("Could not find address space of task with PID: %d\n", u_pid_nr); + return NULL; + } + + return mm; +} + static int __run_selftests(const char *name, struct selftest *st, unsigned int count, void *data) { + struct mm_struct *mm = NULL; + int u_pid_nr = -1; int err = 0; + if (i915_selftest.userspace_pid) + u_pid_nr = i915_selftest.userspace_pid; + while (!i915_selftest.random_seed) i915_selftest.random_seed = get_random_u32(); @@ -201,14 +245,36 @@ static int __run_selftests(const char *name, pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", name, i915_selftest.random_seed, i915_selftest.timeout_ms); + /* + * If we are running in a kthread on a multi NUMA system and the user passed + * a valid PID of a userspace task, then we may borrow its address space + * to prepare a safe environment for the mmap selftests. + */ + if (!current->mm && u_pid_nr > 0) { + mm = get_mm(u_pid_nr); + if (mm) { + kthread_use_mm(mm); + if (unlikely(!current->mm)) { + mmput(mm); + mm = NULL; + pr_warn("Could not set mm as current->mm\n"); + } + } + } + /* Tests are listed in order in i915_*_selftests.h */ for (; count--; st++) { if (!st->enabled) continue; cond_resched(); - if (signal_pending(current)) + if (signal_pending(current)) { + if (mm) { + kthread_unuse_mm(mm); + mmput_async(mm); + } return -EINTR; + } pr_info(DRIVER_NAME ": Running %s\n", st->name); if (data) @@ -226,6 +292,11 @@ static int __run_selftests(const char *name, st->name, err)) err = -1; + if (mm) { + kthread_unuse_mm(mm); + mmput_async(mm); + } + return err; } @@ -507,6 +578,8 @@ void igt_hexdump(const void *buf, size_t len) module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); +module_param_named(st_userspace_pid, i915_selftest.userspace_pid, uint, 0400); +MODULE_PARM_DESC(st_userspace_pid, "For usage in tests that map userspace memory and require address space with controllable lifetime."); module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then leave dummy module)"); -- 2.34.1
