Hi Krzysztof, On 2025-06-11 at 13:33:19 GMT, Krzysztof Karas wrote: > igt_mmap_migrate() tests migration with various parameters. > In one of the cases, where FILL and UNFAULTABLE flags are set, > during first stages of this test a mock file is opened in > igt_mmap_offset(), which results in allocating some data in the > GPU memory. Then, also in igt_mmap_offset(), the file is closed > (fput) and the cleanup of that data is scheduled. Right after > that, the test calls igt_fill_mappable() to fill all available > GPU memory. At this point, three scenarios are possible > (N = max size of GPU memory for this test in MiB): > 1) the data cleanup does not fire until the whole test is over, > so the memory is fully occupied by 1 MiB with that data and > N - 1 MiB added by igt_fill_mappable(), > 2) the data cleanup fires before igt_fill_mappable() completes, > so the whole memory is populated with N MiB by > igt_fill_mappable(), > 3) the data cleanup is performed right after fill is done, > so only N - 1 MiB are in the GPU memory, preventing the test > from properly faulting - we'd expect no space left, but an > object was able to fit in the remaining 1 MiB. > > Amend the problem by keeping the mock file open throughout the > duration of this test and calling fput() from the test itself. > > Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13929 > Signed-off-by: Krzysztof Karas <krzysztof.ka...@intel.com> > --- > On DG2 platforms the memory for data allocated as a result of > opening a mock file remains occupied until the test is done > (scenario 1), but on ATSM cards the data is freed earlier > (scenarios 2 and 3), which leads to sporadic failures. > > During testing I observed that the max memory was equal > to either 4096 or 2048 and igt_fill_mappable() tries to allocate > as many 1k objects as possible before halving allocation size. > > .../drm/i915/gem/selftests/i915_gem_mman.c | 11 ++++- > drivers/gpu/drm/i915/selftests/igt_mmap.c | 47 +++++++++++++------ > drivers/gpu/drm/i915/selftests/igt_mmap.h | 8 ++++ > 3 files changed, 50 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > index 9c3f17e51885..216d1d5ec2f5 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > @@ -1176,6 +1176,7 @@ static int __igt_mmap_migrate(struct > intel_memory_region **placements, > struct drm_i915_private *i915 = placements[0]->i915; > struct drm_i915_gem_object *obj; > struct i915_request *rq = NULL; > + struct file *mock_file; > unsigned long addr; > LIST_HEAD(objects); > u64 offset; > @@ -1194,14 +1195,19 @@ static int __igt_mmap_migrate(struct > intel_memory_region **placements, > if (err) > goto out_put; > > + /* Pretend to open("/dev/dri/card0") */ > + mock_file = mock_drm_getfile(i915->drm.primary, O_RDWR); > + if (IS_ERR(mock_file)) > + return PTR_ERR(mock_file); > + > /* > * This will eventually create a GEM context, due to opening dummy drm > * file, which needs a tiny amount of mappable device memory for the top > * level paging structures(and perhaps scratch), so make sure we > * allocate early, to avoid tears. > */ > - addr = igt_mmap_offset(i915, offset, obj->base.size, > - PROT_WRITE, MAP_SHARED); > + addr = igt_mmap_offset_with_file(i915, offset, obj->base.size, > + PROT_WRITE, MAP_SHARED, mock_file); > if (IS_ERR_VALUE(addr)) { > err = addr; > goto out_put; > @@ -1299,6 +1305,7 @@ static int __igt_mmap_migrate(struct > intel_memory_region **placements, > } > > out_put: > + fput(mock_file); > i915_gem_object_put(obj); > igt_close_objects(i915, &objects); > return err; > diff --git a/drivers/gpu/drm/i915/selftests/igt_mmap.c > b/drivers/gpu/drm/i915/selftests/igt_mmap.c > index e920a461bd36..ebe2c1933f03 100644 > --- a/drivers/gpu/drm/i915/selftests/igt_mmap.c > +++ b/drivers/gpu/drm/i915/selftests/igt_mmap.c > @@ -9,17 +9,22 @@ > #include "i915_drv.h" > #include "igt_mmap.h" > > -unsigned long igt_mmap_offset(struct drm_i915_private *i915, > - u64 offset, > - unsigned long size, > - unsigned long prot, > - unsigned long flags) > +unsigned long igt_mmap_offset_with_file(struct drm_i915_private *i915, > + u64 offset, > + unsigned long size, > + unsigned long prot, > + unsigned long flags, > + struct file *file) > { > struct drm_vma_offset_node *node; > - struct file *file; > unsigned long addr; > int err; > > + if (!file) { > + pr_info("file cannot be NULL\n"); > + return -EINVAL; > + } > + > /* no need to refcount, we own this object */ > drm_vma_offset_lock_lookup(i915->drm.vma_offset_manager); > node = drm_vma_offset_exact_lookup_locked(i915->drm.vma_offset_manager, > @@ -31,22 +36,36 @@ unsigned long igt_mmap_offset(struct drm_i915_private > *i915, > return -ENOENT; > } > > - /* Pretend to open("/dev/dri/card0") */ > - file = mock_drm_getfile(i915->drm.primary, O_RDWR); > - if (IS_ERR(file)) > - return PTR_ERR(file); > - > err = drm_vma_node_allow(node, file->private_data); > if (err) { > - addr = err; > - goto out_file; > + fput(file); > + return err; > }
This introduces a double fput if drm_vma_node_allow() fails. > > addr = vm_mmap(file, 0, drm_vma_node_size(node) << PAGE_SHIFT, > prot, flags, drm_vma_node_offset_addr(node)); > > drm_vma_node_revoke(node, file->private_data); > -out_file: > + > + return addr; > +} > + > +unsigned long igt_mmap_offset(struct drm_i915_private *i915, > + u64 offset, > + unsigned long size, > + unsigned long prot, > + unsigned long flags) > +{ > + struct file *file; > + unsigned long addr; > + > + /* Pretend to open("/dev/dri/card0") */ > + file = mock_drm_getfile(i915->drm.primary, O_RDWR); > + if (IS_ERR(file)) > + return PTR_ERR(file); > + > + addr = igt_mmap_offset_with_file(i915, offset, size, prot, flags, file); > fput(file); > + (the second fput being here) Thanks Krzysztof