Use device file descriptors and regions to represent pagemaps on
foreign or local devices.

The underlying files are type-checked at madvise time, and
references are kept on the drm_pagemap as long as there is are
madvises pointing to it.

Extend the madvise preferred_location UAPI to support the region
instance to identify the foreign placement.

v2:
- Improve UAPI documentation. (Matt Brost)
- Sanitize preferred_mem_loc.region_instance madvise. (Matt Brost)
- Clarify madvise drm_pagemap vs xe_pagemap refcounting. (Matt Brost)
- Don't allow a foreign drm_pagemap madvise without a fast
  interconnect.

Signed-off-by: Thomas Hellström <[email protected]>
---
 drivers/gpu/drm/xe/xe_device.c     | 14 +++++
 drivers/gpu/drm/xe/xe_device.h     |  2 +
 drivers/gpu/drm/xe/xe_svm.c        | 78 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_svm.h        |  7 +++
 drivers/gpu/drm/xe/xe_vm_madvise.c | 86 ++++++++++++++++++++++++++----
 include/uapi/drm/xe_drm.h          | 18 +++++--
 6 files changed, 191 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ff598d0c68d7..2465c7a9a63e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -373,6 +373,20 @@ static const struct file_operations xe_driver_fops = {
        .fop_flags = FOP_UNSIGNED_OFFSET,
 };
 
+/**
+ * xe_is_xe_file() - Is the file an xe device file?
+ * @file: The file.
+ *
+ * Checks whether the file is opened against
+ * an xe device.
+ *
+ * Return: %true if an xe file, %false if not.
+ */
+bool xe_is_xe_file(const struct file *file)
+{
+       return file->f_op == &xe_driver_fops;
+}
+
 static struct drm_driver driver = {
        /* Don't use MTRRs here; the Xserver or userspace app should
         * deal with them for Intel hardware.
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 32cc6323b7f6..475e2245c955 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -195,6 +195,8 @@ void xe_file_put(struct xe_file *xef);
 
 int xe_is_injection_active(void);
 
+bool xe_is_xe_file(const struct file *file);
+
 /*
  * Occasionally it is seen that the G2H worker starts running after a delay of 
more than
  * a second even after being queued and activated by the Linux workqueue 
subsystem. This
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 006de141dfa7..c0b17b548a00 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -1788,6 +1788,78 @@ int xe_pagemap_cache_create(struct xe_tile *tile)
        return 0;
 }
 
+static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 
region_instance)
+{
+       u32 tile_id = region_instance - 1;
+       struct xe_pagemap *xpagemap;
+       struct drm_pagemap *dpagemap;
+       struct xe_vram_region *vr;
+
+       if (tile_id >= xe->info.tile_count)
+               return ERR_PTR(-ENOENT);
+
+       if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask))
+               return ERR_PTR(-ENOENT);
+
+       vr = xe_tile_to_vr(&xe->tiles[tile_id]);
+       xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
+       if (IS_ERR(xpagemap))
+               return ERR_CAST(xpagemap);
+
+       /* Below is for clarity only. The reference counter is the same. */
+       dpagemap = drm_pagemap_get(&xpagemap->dpagemap);
+       xe_pagemap_put(xpagemap);
+
+       return dpagemap;
+}
+
+/**
+ * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a
+ * (file_descriptor, region_instance) pair.
+ * @fd: An fd opened against an xe device.
+ * @region_instance: The region instance representing the device memory
+ * on the opened xe device.
+ *
+ * Opens a struct drm_pagemap pointer on the
+ * indicated device and region_instance.
+ *
+ * Return: A reference-counted struct drm_pagemap pointer on success,
+ * negative error pointer on failure.
+ */
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+       struct drm_pagemap *dpagemap;
+       struct file *file;
+       struct drm_file *fpriv;
+       struct drm_device *drm;
+       int idx;
+
+       if (fd <= 0)
+               return ERR_PTR(-EINVAL);
+
+       file = fget(fd);
+       if (!file)
+               return ERR_PTR(-ENOENT);
+
+       if (!xe_is_xe_file(file)) {
+               dpagemap = ERR_PTR(-ENOENT);
+               goto out;
+       }
+
+       fpriv = file->private_data;
+       drm = fpriv->minor->dev;
+       if (!drm_dev_enter(drm, &idx)) {
+               dpagemap = ERR_PTR(-ENODEV);
+               goto out;
+       }
+
+       dpagemap = xe_devmem_open(to_xe_device(drm), region_instance);
+       drm_dev_exit(idx);
+out:
+       fput(file);
+       return dpagemap;
+}
+
 #else
 
 int xe_pagemap_shrinker_create(struct xe_device *xe)
@@ -1811,6 +1883,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma 
*vma, struct xe_tile *t
 {
        return NULL;
 }
+
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
+{
+       return ERR_PTR(-ENOENT);
+}
+
 #endif
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index a0ec173c6bf0..60eae01a4220 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -187,6 +187,8 @@ int xe_pagemap_shrinker_create(struct xe_device *xe);
 
 int xe_pagemap_cache_create(struct xe_tile *tile);
 
+struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance);
+
 #else
 #include <linux/interval_tree.h>
 #include "xe_vm.h"
@@ -378,6 +380,11 @@ static inline int xe_pagemap_cache_create(struct xe_tile 
*tile)
        return 0;
 }
 
+static inline struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 
region_instance)
+{
+       return ERR_PTR(-ENOENT);
+}
+
 #define xe_svm_range_has_dma_mapping(...) false
 #endif /* CONFIG_DRM_XE_GPUSVM */
 
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c 
b/drivers/gpu/drm/xe/xe_vm_madvise.c
index d6f47c8e146d..add9a6ca2390 100644
--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -22,6 +22,19 @@ struct xe_vmas_in_madvise_range {
        bool has_svm_userptr_vmas;
 };
 
+/**
+ * struct xe_madvise_details - Argument to madvise_funcs
+ * @dpagemap: Reference-counted pointer to a struct drm_pagemap.
+ *
+ * The madvise IOCTL handler may, in addition to the user-space
+ * args, have additional info to pass into the madvise_func that
+ * handles the madvise type. Use a struct_xe_madvise_details
+ * for that and extend the struct as necessary.
+ */
+struct xe_madvise_details {
+       struct drm_pagemap *dpagemap;
+};
+
 static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range 
*madvise_range)
 {
        u64 addr = madvise_range->addr;
@@ -74,7 +87,8 @@ static int get_vmas(struct xe_vm *vm, struct 
xe_vmas_in_madvise_range *madvise_r
 
 static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
                                      struct xe_vma **vmas, int num_vmas,
-                                     struct drm_xe_madvise *op)
+                                     struct drm_xe_madvise *op,
+                                     struct xe_madvise_details *details)
 {
        int i;
 
@@ -96,14 +110,18 @@ static void madvise_preferred_mem_loc(struct xe_device 
*xe, struct xe_vm *vm,
                         * is of no use and can be ignored.
                         */
                        loc->migration_policy = 
op->preferred_mem_loc.migration_policy;
+                       drm_pagemap_put(loc->dpagemap);
                        loc->dpagemap = NULL;
+                       if (details->dpagemap)
+                               loc->dpagemap = 
drm_pagemap_get(details->dpagemap);
                }
        }
 }
 
 static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
                           struct xe_vma **vmas, int num_vmas,
-                          struct drm_xe_madvise *op)
+                          struct drm_xe_madvise *op,
+                          struct xe_madvise_details *details)
 {
        struct xe_bo *bo;
        int i;
@@ -144,7 +162,8 @@ static void madvise_atomic(struct xe_device *xe, struct 
xe_vm *vm,
 
 static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
                              struct xe_vma **vmas, int num_vmas,
-                             struct drm_xe_madvise *op)
+                             struct drm_xe_madvise *op,
+                             struct xe_madvise_details *details)
 {
        int i;
 
@@ -162,7 +181,8 @@ static void madvise_pat_index(struct xe_device *xe, struct 
xe_vm *vm,
 
 typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
                             struct xe_vma **vmas, int num_vmas,
-                            struct drm_xe_madvise *op);
+                            struct drm_xe_madvise *op,
+                            struct xe_madvise_details *details);
 
 static const madvise_func madvise_funcs[] = {
        [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
@@ -246,11 +266,12 @@ static bool madvise_args_are_sane(struct xe_device *xe, 
const struct drm_xe_madv
                if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
                        return false;
 
-               if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
-                                    DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
+               if (XE_IOCTL_DBG(xe, fd <= DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 
&&
+                                args->preferred_mem_loc.region_instance != 0))
                        return false;
 
-               if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
+               if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
+                                    DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
                        return false;
 
                if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
@@ -296,6 +317,41 @@ static bool madvise_args_are_sane(struct xe_device *xe, 
const struct drm_xe_madv
        return true;
 }
 
+static int xe_madvise_details_init(struct xe_vm *vm, const struct 
drm_xe_madvise *args,
+                                  struct xe_madvise_details *details)
+{
+       struct xe_device *xe = vm->xe;
+
+       memset(details, 0, sizeof(*details));
+
+       if (args->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC) {
+               int fd = args->preferred_mem_loc.devmem_fd;
+               struct drm_pagemap *dpagemap;
+
+               if (fd <= 0)
+                       return 0;
+
+               dpagemap = 
xe_drm_pagemap_from_fd(args->preferred_mem_loc.devmem_fd,
+                                                 
args->preferred_mem_loc.region_instance);
+               if (XE_IOCTL_DBG(xe, IS_ERR(dpagemap)))
+                       return PTR_ERR(dpagemap);
+
+               /* Don't allow a foreign placement without a fast interconnect! 
*/
+               if (XE_IOCTL_DBG(xe, dpagemap->pagemap->owner != 
vm->svm.peer.owner)) {
+                       drm_pagemap_put(dpagemap);
+                       return -ENOLINK;
+               }
+               details->dpagemap = dpagemap;
+       }
+
+       return 0;
+}
+
+static void xe_madvise_details_fini(struct xe_madvise_details *details)
+{
+       drm_pagemap_put(details->dpagemap);
+}
+
 static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
                                   int num_vmas, u32 atomic_val)
 {
@@ -349,6 +405,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 
struct drm_file *fil
        struct drm_xe_madvise *args = data;
        struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
                                                         .range =  args->range, 
};
+       struct xe_madvise_details details;
        struct xe_vm *vm;
        struct drm_exec exec;
        int err, attr_type;
@@ -373,13 +430,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void 
*data, struct drm_file *fil
                goto unlock_vm;
        }
 
-       err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+       err = xe_madvise_details_init(vm, args, &details);
        if (err)
                goto unlock_vm;
 
+       err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+       if (err)
+               goto madv_fini;
+
        err = get_vmas(vm, &madvise_range);
        if (err || !madvise_range.num_vmas)
-               goto unlock_vm;
+               goto madv_fini;
 
        if (madvise_range.has_bo_vmas) {
                if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
@@ -387,7 +448,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 
struct drm_file *fil
                                                    madvise_range.num_vmas,
                                                    args->atomic.val)) {
                                err = -EINVAL;
-                               goto unlock_vm;
+                               goto madv_fini;
                        }
                }
 
@@ -413,7 +474,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 
struct drm_file *fil
        }
 
        attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
-       madvise_funcs[attr_type](xe, vm, madvise_range.vmas, 
madvise_range.num_vmas, args);
+       madvise_funcs[attr_type](xe, vm, madvise_range.vmas, 
madvise_range.num_vmas, args,
+                                &details);
 
        err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + 
args->range);
 
@@ -425,6 +487,8 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, 
struct drm_file *fil
                drm_exec_fini(&exec);
        kfree(madvise_range.vmas);
        madvise_range.vmas = NULL;
+madv_fini:
+       xe_madvise_details_fini(&details);
 unlock_vm:
        up_write(&vm->lock);
 put_vm:
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 47853659a705..34c69bcea203 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -2071,7 +2071,13 @@ struct drm_xe_madvise {
                struct {
 #define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE    0
 #define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM    -1
-                       /** @preferred_mem_loc.devmem_fd: fd for preferred loc 
*/
+                       /**
+                        * @preferred_mem_loc.devmem_fd:
+                        * Device file-descriptor of the device where the
+                        * preferred memory is located, or one of the
+                        * above special values. Please also see
+                        * @preferred_mem_loc.region_instance below.
+                        */
                        __u32 devmem_fd;
 
 #define DRM_XE_MIGRATE_ALL_PAGES               0
@@ -2079,8 +2085,14 @@ struct drm_xe_madvise {
                        /** @preferred_mem_loc.migration_policy: Page migration 
policy */
                        __u16 migration_policy;
 
-                       /** @preferred_mem_loc.pad : MBZ */
-                       __u16 pad;
+                       /**
+                        * @preferred_mem_loc.region_instance : Region instance.
+                        * MBZ if @devmem_fd <= 
&DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE.
+                        * Otherwise should point to the desired device
+                        * VRAM instance of the device indicated by
+                        * @preferred_mem_loc.devmem_fd.
+                        */
+                       __u16 region_instance;
 
                        /** @preferred_mem_loc.reserved : Reserved */
                        __u64 reserved;
-- 
2.51.1

Reply via email to