Migration testing in i915 uses current task's address space to
allocate new userspace mapping, without registering real user
for that address space in mm_struct. Since PCI probe executes in
another process, "current" is unknown at the time the selftest
run [1].

It was observed that mm->mm_users would occasionally be 0
or drop to 0 during the test, which reaped userspace mappings,
further leading to failures upon reading from userland memory.

Prevent this by waiting for usable address space and
artificially increasing its mm_users counter for the duration
of the test.

[1]:
IGT makes use of selftest on module load mechanism in i915:
 1) IGT recognizes arguments and passes them to libkmod while in
  userspace;
 2) libkmod forms and executes a syscall simulating a
  modprobe command, thus moving to kernel context;
 3) PCI code puts local_pci_probe() call onto a workqueue via
  work_on_cpu() macro in pci_call_probe(). Below backtrace shows
  call order between syscall and work_on_cpu() call:

Call Trace:
 <TASK>
 dump_stack_lvl+0xc1/0xf0
 dump_stack+0x10/0x20
 pci_device_probe+0x205/0x280
 really_probe+0xf1/0x410
 __driver_probe_device+0x8c/0x190
 driver_probe_device+0x24/0xd0
 __driver_attach+0x10f/0x240
 ? __pfx___driver_attach+0x10/0x10
 bus_for_each_dev+0x7f/0xe0
 driver_attach+0x1e/0x30
 bus_add_driver+0x163/0x2a0
 driver_register+0x5e/0x130
 __pci_register_driver+0x80/0xa0
 i915_pci_register_driver+0x23/0x30 [i915]
 i915_init+0x37/0x120 [i915]
 ? __pfx_i915_init+0x10/0x10 [i915]
 do_one_initcall+0x5e/0x3a0
 do_init_module+0x97/0x2b0
 load_module+0x2d89/0x2e90
 ? kernel_read_file+0x2b1/0x320
 init_module_from_file+0xf4/0x120
 ? init_module_from_file+0xf4/0x120
 idempotent_init_module+0x117/0x330
 __x64_sys_finit_module+0x73/0xf0
 x64_sys_call+0x1d68/0x26b0
 do_syscall_64+0x93/0x1470
 ? do_syscall_64+0x1e4/0x1470
 ? ksys_lseek+0x4f/0xd0
 ? do_syscall_64+0x1e4/0x1470
 ? clear_bhb_loop+0x50/0xa0
 ? clear_bhb_loop+0x50/0xa0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

note that pci_device_probe() calls __pci_device_probe(), which
then calls pci_call_probe() and both are static functions;
 4) at last, the actual probe is called from kworker and the
 selftests execute:
Call Trace:
 <TASK>
 dump_stack_lvl+0xc1/0xf0
 dump_stack+0x10/0x20
 igt_mmap_migrate+0x302/0x7e0 [i915]
 __i915_subtests+0xb8/0x250 [i915]
 ? __pfx___i915_live_teardown+0x10/0x10 [i915]
 ? __pfx___i915_live_setup+0x10/0x10 [i915]
 i915_gem_mman_live_selftests+0x103/0x140 [i915]
 __run_selftests+0xc5/0x220 [i915]
 i915_live_selftests+0xaa/0x130 [i915]
 i915_pci_probe+0xee/0x1d0 [i915]
 local_pci_probe+0x47/0xb0
 work_for_cpu_fn+0x1a/0x30
 process_one_work+0x22e/0x6b0
 worker_thread+0x1e8/0x3d0
 ? __pfx_worker_thread+0x10/0x10
 kthread+0x11f/0x250
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x344/0x3a0
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1a/0x30

This operation of putting selftests into a forked process
creates a short delay in which another userspace process may be
handled by the scheduler, so IGT process is not the one from
which kernel borrows the address space.

Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14204
Fixes: 34b1c1c71d37 ("i915/selftest/igt_mmap: let mmap tests run in kthread")
Signed-off-by: Krzysztof Karas <[email protected]>
---
This has previously been a regular patch, but after many
deliberations and discussions with i915 team members we decided
that this should be made into a RFC, as this may still be an
incomplete solution.

This change is best effort to increase reliability of the
selftest. There will still be instances, where there is no
suitable address space available, because we rely on operating
system to be in a certain state, which we probably should not
force by ourselves. In this case, it would be sufficient to have
this test pass most of the time and silently skip if it cannot
execute safely.

 .../drm/i915/gem/selftests/i915_gem_mman.c    | 44 ++++++++++++++++---
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 9d454d0b46f2..ccb00cd5e750 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -38,6 +38,8 @@ struct tile {
        unsigned int swizzle;
 };
 
+bool skip_vma = true;
+
 static u64 swizzle_bit(unsigned int bit, u64 offset)
 {
        return (offset & BIT_ULL(bit)) >> (bit - 6);
@@ -903,6 +905,9 @@ static int __igt_mmap(struct drm_i915_private *i915,
        int err, i;
        u64 offset;
 
+       if (skip_vma)
+               return 0;
+
        if (!can_mmap(obj, type))
                return 0;
 
@@ -1165,6 +1170,9 @@ static int __igt_mmap_migrate(struct intel_memory_region 
**placements,
        u64 offset;
        int err;
 
+       if (skip_vma)
+               return 0;
+
        obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
                                            placements,
                                            n_placements);
@@ -1847,7 +1855,6 @@ static int igt_mmap_revoke(void *arg)
 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 {
        int ret;
-       bool unuse_mm = false;
        static const struct i915_subtest tests[] = {
                SUBTEST(igt_partial_tiling),
                SUBTEST(igt_smoke_tiling),
@@ -1860,14 +1867,41 @@ int i915_gem_mman_live_selftests(struct 
drm_i915_private *i915)
        };
 
        if (!current->mm) {
-               kthread_use_mm(current->active_mm);
-               unuse_mm = true;
+               int timeout = 10;
+               /**
+                * We want to use current->active_mm, which corresponds to the
+                * address space of a userspace process that was last handled by
+                * scheduler. It is possible that this memory is in the middle
+                * of cleanup indicated by mm_users == 0, in which case kernel
+                * waits until the process is finished to release its mm_struct.
+                * Borrowing that memory at that point is unsafe, because it may
+                * be freed at any point and taking additional references to it
+                * will not change the cleanup behavior.
+                * Wait for a bit in hopes that another process is taken by
+                * scheduler and has reliable memory for us to map into.
+                */
+               while (timeout--) {
+                       if (mmget_not_zero(current->active_mm)) {
+                               kthread_use_mm(current->active_mm);
+                               skip_vma = false;
+                               break;
+                       }
+
+                       msleep(1000);
+               }
        }
 
        ret = i915_live_subtests(tests, i915);
 
-       if (unuse_mm)
-               kthread_unuse_mm(current->active_mm);
+       if (!skip_vma) {
+               /**
+                * The scheduler was working while the test executed,
+                * so current->active_mm might have changed. Use the old
+                * reference to the address space stored in current->mm.
+                */
+               mmput_async(current->mm);
+               kthread_unuse_mm(current->mm);
+       }
 
        return ret;
 }
-- 
2.34.1


-- 
Best Regards,
Krzysztof

Reply via email to