On Tue, Nov 11, 2025 at 05:44:03PM +0100, Thomas Hellström wrote: > Use device file descriptors and regions to represent pagemaps on > foreign or local devices. > > The underlying files are type-checked at madvise time, and > references are kept on the drm_pagemap as long as there is are > madvises pointing to it. > > Extend the madvise preferred_location UAPI to support the region > instance to identify the foreign placement. > > v2: > - Improve UAPI documentation. (Matt Brost) > - Sanitize preferred_mem_loc.region_instance madvise. (Matt Brost) > - Clarify madvise drm_pagemap vs xe_pagemap refcounting. (Matt Brost) > - Don't allow a foreign drm_pagemap madvise without a fast > interconnect. > > Signed-off-by: Thomas Hellström <[email protected]> > --- > drivers/gpu/drm/xe/xe_device.c | 14 +++++ > drivers/gpu/drm/xe/xe_device.h | 2 + > drivers/gpu/drm/xe/xe_svm.c | 78 +++++++++++++++++++++++++++ > drivers/gpu/drm/xe/xe_svm.h | 7 +++ > drivers/gpu/drm/xe/xe_vm_madvise.c | 86 ++++++++++++++++++++++++++---- > include/uapi/drm/xe_drm.h | 18 +++++-- > 6 files changed, 191 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c > index ff598d0c68d7..2465c7a9a63e 100644 > --- a/drivers/gpu/drm/xe/xe_device.c > +++ b/drivers/gpu/drm/xe/xe_device.c > @@ -373,6 +373,20 @@ static const struct file_operations xe_driver_fops = { > .fop_flags = FOP_UNSIGNED_OFFSET, > }; > > +/** > + * xe_is_xe_file() - Is the file an xe device file? > + * @file: The file. > + * > + * Checks whether the file is opened against > + * an xe device. > + * > + * Return: %true if an xe file, %false if not. > + */ > +bool xe_is_xe_file(const struct file *file) > +{ > + return file->f_op == &xe_driver_fops; > +} > + > static struct drm_driver driver = { > /* Don't use MTRRs here; the Xserver or userspace app should > * deal with them for Intel hardware. > diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h > index 32cc6323b7f6..475e2245c955 100644 > --- a/drivers/gpu/drm/xe/xe_device.h > +++ b/drivers/gpu/drm/xe/xe_device.h > @@ -195,6 +195,8 @@ void xe_file_put(struct xe_file *xef); > > int xe_is_injection_active(void); > > +bool xe_is_xe_file(const struct file *file); > + > /* > * Occasionally it is seen that the G2H worker starts running after a delay > of more than > * a second even after being queued and activated by the Linux workqueue > subsystem. This > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > index 006de141dfa7..c0b17b548a00 100644 > --- a/drivers/gpu/drm/xe/xe_svm.c > +++ b/drivers/gpu/drm/xe/xe_svm.c > @@ -1788,6 +1788,78 @@ int xe_pagemap_cache_create(struct xe_tile *tile) > return 0; > } > > +static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 > region_instance) > +{ > + u32 tile_id = region_instance - 1; > + struct xe_pagemap *xpagemap; > + struct drm_pagemap *dpagemap; > + struct xe_vram_region *vr; > + > + if (tile_id >= xe->info.tile_count) > + return ERR_PTR(-ENOENT); > + > + if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask)) > + return ERR_PTR(-ENOENT); > + > + vr = xe_tile_to_vr(&xe->tiles[tile_id]); > + xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); > + if (IS_ERR(xpagemap)) > + return ERR_CAST(xpagemap); > + > + /* Below is for clarity only. The reference counter is the same. */ > + dpagemap = drm_pagemap_get(&xpagemap->dpagemap); > + xe_pagemap_put(xpagemap);
I think this is overkill, maybe just a comment above xe_pagemap_find_or_create indicating this function returns xpagemap with a ref. Either way, LGTM: Reviewed-by: Matthew Brost <[email protected]> > + > + return dpagemap; > +} > + > +/** > + * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a > + * (file_descriptor, region_instance) pair. > + * @fd: An fd opened against an xe device. > + * @region_instance: The region instance representing the device memory > + * on the opened xe device. > + * > + * Opens a struct drm_pagemap pointer on the > + * indicated device and region_instance. > + * > + * Return: A reference-counted struct drm_pagemap pointer on success, > + * negative error pointer on failure. > + */ > +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) > +{ > + struct drm_pagemap *dpagemap; > + struct file *file; > + struct drm_file *fpriv; > + struct drm_device *drm; > + int idx; > + > + if (fd <= 0) > + return ERR_PTR(-EINVAL); > + > + file = fget(fd); > + if (!file) > + return ERR_PTR(-ENOENT); > + > + if (!xe_is_xe_file(file)) { > + dpagemap = ERR_PTR(-ENOENT); > + goto out; > + } > + > + fpriv = file->private_data; > + drm = fpriv->minor->dev; > + if (!drm_dev_enter(drm, &idx)) { > + dpagemap = ERR_PTR(-ENODEV); > + goto out; > + } > + > + dpagemap = xe_devmem_open(to_xe_device(drm), region_instance); > + drm_dev_exit(idx); > +out: > + fput(file); > + return dpagemap; > +} > + > #else > > int xe_pagemap_shrinker_create(struct xe_device *xe) > @@ -1811,6 +1883,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct > xe_vma *vma, struct xe_tile *t > { > return NULL; > } > + > +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) > +{ > + return ERR_PTR(-ENOENT); > +} > + > #endif > > /** > diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h > index a0ec173c6bf0..60eae01a4220 100644 > --- a/drivers/gpu/drm/xe/xe_svm.h > +++ b/drivers/gpu/drm/xe/xe_svm.h > @@ -187,6 +187,8 @@ int xe_pagemap_shrinker_create(struct xe_device *xe); > > int xe_pagemap_cache_create(struct xe_tile *tile); > > +struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance); > + > #else > #include <linux/interval_tree.h> > #include "xe_vm.h" > @@ -378,6 +380,11 @@ static inline int xe_pagemap_cache_create(struct xe_tile > *tile) > return 0; > } > > +static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 > region_instance) > +{ > + return ERR_PTR(-ENOENT); > +} > + > #define xe_svm_range_has_dma_mapping(...) false > #endif /* CONFIG_DRM_XE_GPUSVM */ > > diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c > b/drivers/gpu/drm/xe/xe_vm_madvise.c > index d6f47c8e146d..add9a6ca2390 100644 > --- a/drivers/gpu/drm/xe/xe_vm_madvise.c > +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c > @@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range { > bool has_svm_userptr_vmas; > }; > > +/** > + * struct xe_madvise_details - Argument to madvise_funcs > + * @dpagemap: Reference-counted pointer to a struct drm_pagemap. > + * > + * The madvise IOCTL handler may, in addition to the user-space > + * args, have additional info to pass into the madvise_func that > + * handles the madvise type. Use a struct_xe_madvise_details > + * for that and extend the struct as necessary. > + */ > +struct xe_madvise_details { > + struct drm_pagemap *dpagemap; > +}; > + > static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range > *madvise_range) > { > u64 addr = madvise_range->addr; > @@ -74,7 +87,8 @@ static int get_vmas(struct xe_vm *vm, struct > xe_vmas_in_madvise_range *madvise_r > > static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm, > struct xe_vma **vmas, int num_vmas, > - struct drm_xe_madvise *op) > + struct drm_xe_madvise *op, > + struct xe_madvise_details *details) > { > int i; > > @@ -96,14 +110,18 @@ static void madvise_preferred_mem_loc(struct xe_device > *xe, struct xe_vm *vm, > * is of no use and can be ignored. > */ > loc->migration_policy = > op->preferred_mem_loc.migration_policy; > + drm_pagemap_put(loc->dpagemap); > loc->dpagemap = NULL; > + if (details->dpagemap) > + loc->dpagemap = > drm_pagemap_get(details->dpagemap); > } > } > } > > static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm, > struct xe_vma **vmas, int num_vmas, > - struct drm_xe_madvise *op) > + struct drm_xe_madvise *op, > + struct xe_madvise_details *details) > { > struct xe_bo *bo; > int i; > @@ -144,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct > xe_vm *vm, > > static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm, > struct xe_vma **vmas, int num_vmas, > - struct drm_xe_madvise *op) > + struct drm_xe_madvise *op, > + struct xe_madvise_details *details) > { > int i; > > @@ -162,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, > struct xe_vm *vm, > > typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm, > struct xe_vma **vmas, int num_vmas, > - struct drm_xe_madvise *op); > + struct drm_xe_madvise *op, > + struct xe_madvise_details *details); > > static const madvise_func madvise_funcs[] = { > [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc, > @@ -246,11 +266,12 @@ static bool madvise_args_are_sane(struct xe_device *xe, > const struct drm_xe_madv > if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)) > return false; > > - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > > - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) > + if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE > && > + args->preferred_mem_loc.region_instance != 0)) > return false; > > - if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad)) > + if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy > > + DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES)) > return false; > > if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved)) > @@ -296,6 +317,41 @@ static bool madvise_args_are_sane(struct xe_device *xe, > const struct drm_xe_madv > return true; > } > > +static int xe_madvise_details_init(struct xe_vm *vm, const struct > drm_xe_madvise *args, > + struct xe_madvise_details *details) > +{ > + struct xe_device *xe = vm->xe; > + > + memset(details, 0, sizeof(*details)); > + > + if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) { > + int fd = args->preferred_mem_loc.devmem_fd; > + struct drm_pagemap *dpagemap; > + > + if (fd <= 0) > + return 0; > + > + dpagemap = > xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd, > + > args->preferred_mem_loc.region_instance); > + if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap))) > + return PTR_ERR(dpagemap); > + > + /* Don't allow a foreign placement without a fast interconnect! > */ > + if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != > vm->svm.peer.owner)) { > + drm_pagemap_put(dpagemap); > + return -ENOLINK; > + } > + details->dpagemap = dpagemap; > + } > + > + return 0; > +} > + > +static void xe_madvise_details_fini(struct xe_madvise_details *details) > +{ > + drm_pagemap_put(details->dpagemap); > +} > + > static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas, > int num_vmas, u32 atomic_val) > { > @@ -349,6 +405,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void > *data, struct drm_file *fil > struct drm_xe_madvise *args = data; > struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start, > .range = args->range, > }; > + struct xe_madvise_details details; > struct xe_vm *vm; > struct drm_exec exec; > int err, attr_type; > @@ -373,13 +430,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void > *data, struct drm_file *fil > goto unlock_vm; > } > > - err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); > + err = xe_madvise_details_init(vm, args, &details); > if (err) > goto unlock_vm; > > + err = xe_vm_alloc_madvise_vma(vm, args->start, args->range); > + if (err) > + goto madv_fini; > + > err = get_vmas(vm, &madvise_range); > if (err || !madvise_range.num_vmas) > - goto unlock_vm; > + goto madv_fini; > > if (madvise_range.has_bo_vmas) { > if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) { > @@ -387,7 +448,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void > *data, struct drm_file *fil > madvise_range.num_vmas, > args->atomic.val)) { > err = -EINVAL; > - goto unlock_vm; > + goto madv_fini; > } > } > > @@ -413,7 +474,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void > *data, struct drm_file *fil > } > > attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs)); > - madvise_funcs[attr_type](xe, vm, madvise_range.vmas, > madvise_range.num_vmas, args); > + madvise_funcs[attr_type](xe, vm, madvise_range.vmas, > madvise_range.num_vmas, args, > + &details); > > err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + > args->range); > > @@ -425,6 +487,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void > *data, struct drm_file *fil > drm_exec_fini(&exec); > kfree(madvise_range.vmas); > madvise_range.vmas = NULL; > +madv_fini: > + xe_madvise_details_fini(&details); > unlock_vm: > up_write(&vm->lock); > put_vm: > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h > index 47853659a705..34c69bcea203 100644 > --- a/include/uapi/drm/xe_drm.h > +++ b/include/uapi/drm/xe_drm.h > @@ -2071,7 +2071,13 @@ struct drm_xe_madvise { > struct { > #define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0 > #define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM -1 > - /** @preferred_mem_loc.devmem_fd: fd for preferred loc > */ > + /** > + * @preferred_mem_loc.devmem_fd: > + * Device file-descriptor of the device where the > + * preferred memory is located, or one of the > + * above special values. Please also see > + * @preferred_mem_loc.region_instance below. > + */ > __u32 devmem_fd; > > #define DRM_XE_MIGRATE_ALL_PAGES 0 > @@ -2079,8 +2085,14 @@ struct drm_xe_madvise { > /** @preferred_mem_loc.migration_policy: Page migration > policy */ > __u16 migration_policy; > > - /** @preferred_mem_loc.pad : MBZ */ > - __u16 pad; > + /** > + * @preferred_mem_loc.region_instance : Region instance. > + * MBZ if @devmem_fd <= > &DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE. > + * Otherwise should point to the desired device > + * VRAM instance of the device indicated by > + * @preferred_mem_loc.devmem_fd. > + */ > + __u16 region_instance; > > /** @preferred_mem_loc.reserved : Reserved */ > __u64 reserved; > -- > 2.51.1 >
