Hi Krzysztof,

On Thu, 2026-05-07 at 14:24 +0000, Krzysztof Karas wrote:
> Migration testing in i915 assumes current task's address space
> to allocate new userspace mapping and uses it without
> registering real user for that address space in mm_struct.
> On single NUMA node setups PCI probe executes in the same
> context as userspace process calling the test (i915_selftest
> from IGT), but when multiple nodes are available, the PCI code
> puts probe into a kernel workqueue. This switches execution to
> a kworker, which does not have its own address space in
> userspace and must borrow such memory from another process, so
> "current->active_mm" is unknown at the start of the test.
> 
> It was observed that mm->mm_users would occasionally be 0
> or drop to 0 during the test due to short delay between
> scheduling and executing work in forked process, which reaped
> userspace mappings, further leading to failures upon reading
> from userland memory.
> 
> Prevent this by adding a PID parameter to a trusted task, so its
> mm struct may be used if needed.
> 
> Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14204
> Signed-off-by: Krzysztof Karas <[email protected]>
> ---
> v7 (Andi):
>  * Add missing mm reference release on error path.
> 
> v8:
>  * Keep reference to mm open for the duration of test for
>  readability. (Sebastian)
>  * Be paranoic and explicit about keeping the mm reference,
>  so we are **really** sure about userspace mappings not
>  diappearing.
> 
> v9:
>  * Drop "Fixes" tag. (Andi)
>  * Revert to using a separate function for mm acquisition. (Andi)
>  * Keep kthread_use/unuse and mmget/mmput calls symmetric. (Janusz)
> 
>  drivers/gpu/drm/i915/i915_selftest.h          |  1 +
>  .../gpu/drm/i915/selftests/i915_selftest.c    | 68 ++++++++++++++++++-
>  2 files changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_selftest.h 
> b/drivers/gpu/drm/i915/i915_selftest.h
> index 72922028f4ba..e29ca298e7eb 100644
> --- a/drivers/gpu/drm/i915/i915_selftest.h
> +++ b/drivers/gpu/drm/i915/i915_selftest.h
> @@ -35,6 +35,7 @@ struct i915_selftest {
>       unsigned long timeout_jiffies;
>       unsigned int timeout_ms;
>       unsigned int random_seed;
> +     unsigned int userspace_pid;
>       char *filter;
>       int mock;
>       int live;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c 
> b/drivers/gpu/drm/i915/selftests/i915_selftest.c
> index 8460f0a70d04..1e8494bab14b 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
> @@ -181,11 +181,48 @@ __wait_gsc_huc_load_completed(struct drm_i915_private 
> *i915)
>               pr_warn(DRIVER_NAME "Timed out waiting for huc load via 
> GSC!\n");
>  }
>  
> +static struct mm_struct *
> +get_mm(int u_pid_nr)
> +{
> +     struct pid *u_pid = find_get_pid(u_pid_nr);

What happens here if the st_userspace_pid module parameter is not provided?

> +     struct task_struct *task = NULL;
> +     struct mm_struct *mm = NULL;
> +
> +     if (!u_pid) {
> +             pr_warn("Could not find PID: %d\n", u_pid_nr);
> +             return NULL;
> +     }
> +
> +     task = get_pid_task(u_pid, PIDTYPE_PID);
> +     put_pid(u_pid);
> +     if (!task) {
> +             pr_warn("Could not find task for PID: %d\n", u_pid_nr);
> +             return NULL;
> +     }
> +
> +     if (task->flags & PF_KTHREAD) {
> +             pr_warn("Task not in userspace: %d\n", u_pid_nr);
> +             put_task_struct(task);
> +             return NULL;
> +     }
> +
> +     mm = get_task_mm(task);
> +     put_task_struct(task);
> +     if (!mm) {
> +             pr_warn("Could not find address space of task with PID: %d\n", 
> u_pid_nr);
> +             return NULL;
> +     }
> +
> +     return mm;
> +}
> +
>  static int __run_selftests(const char *name,
>                          struct selftest *st,
>                          unsigned int count,
>                          void *data)
>  {
> +     int u_pid_nr = i915_selftest.userspace_pid;
> +     struct mm_struct *mm = NULL;
>       int err = 0;
>  
>       while (!i915_selftest.random_seed)
> @@ -201,14 +238,36 @@ static int __run_selftests(const char *name,
>       pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x 
> st_timeout=%u\n",
>               name, i915_selftest.random_seed, i915_selftest.timeout_ms);
>  
> +     /*
> +      * If we are running in a kthread on a multi NUMA system and the user 
> passed
> +      * a valid PID of a userspace task, then we may borrow its address space
> +      * to prepare a safe environment for the mmap selftests.
> +      */
> +     if (!current->mm) {

I think this condition should also check for a valid u_pid_nr.  To avoid 
ambiguity, maybe the i915_selftest.userspace_pid attribute should be 
initialized to a negative value by default (when not overwritten with the 
corresponding module parameter).  There is no point in submitting any 
warnings from here if the module parameter is not provided, I believe.

Other than that, LGTM.

Thanks,
Janusz

> +             mm = get_mm(u_pid_nr);
> +             if (mm) {
> +                     kthread_use_mm(mm);
> +                     if (unlikely(!current->mm)) {
> +                             mmput(mm);
> +                             mm = NULL;
> +                             pr_warn("Could not set mm as current->mm\n");
> +                     }
> +             }
> +     }
> +
>       /* Tests are listed in order in i915_*_selftests.h */
>       for (; count--; st++) {
>               if (!st->enabled)
>                       continue;
>  
>               cond_resched();
> -             if (signal_pending(current))
> +             if (signal_pending(current)) {
> +                     if (mm) {
> +                             kthread_unuse_mm(mm);
> +                             mmput_async(mm);
> +                     }
>                       return -EINTR;
> +             }
>  
>               pr_info(DRIVER_NAME ": Running %s\n", st->name);
>               if (data)
> @@ -226,6 +285,11 @@ static int __run_selftests(const char *name,
>                st->name, err))
>               err = -1;
>  
> +     if (mm) {
> +             kthread_unuse_mm(mm);
> +             mmput_async(mm);
> +     }
> +
>       return err;
>  }
>  
> @@ -507,6 +571,8 @@ void igt_hexdump(const void *buf, size_t len)
>  module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
>  module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
>  module_param_named(st_filter, i915_selftest.filter, charp, 0400);
> +module_param_named(st_userspace_pid, i915_selftest.userspace_pid, uint, 
> 0400);
> +MODULE_PARM_DESC(st_userspace_pid, "For usage in tests that map userspace 
> memory and require address space with controllable lifetime.");
>  
>  module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400);
>  MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock 
> hardware (0:disabled [default], 1:run tests then load driver, -1:run tests 
> then leave dummy module)");

Reply via email to