On 14/02/2019 14:57, Matthew Auld wrote:
From: Abdiel Janulgue <[email protected]>

CPU mmap implementation depending on the object's backing pages.

depends?

At the moment we introduce shmem and local-memory BAR fault handlers
Note that the mmap type is done one at a time to circumvent the DRM
offset manager limitation. Note that we multiplex mmap_gtt and

Perhaps it is time to sort out the offset manager? I have a feeling that would make things much easier/cleaner for us.

And I at least find mmap_origin a confusing term. It is nor a origin of a mapping, but location of object backing store what matters, right?

mmap_offset through the same ioctl, and use the zero extending behaviour
of drm to differentiate between them, when we inspect the flags.

Signed-off-by: Abdiel Janulgue <[email protected]>
Signed-off-by: Matthew Auld <[email protected]>
Cc: Joonas Lahtinen <[email protected]>
---
  drivers/gpu/drm/i915/i915_drv.c        |  5 +-
  drivers/gpu/drm/i915/i915_drv.h        |  3 +
  drivers/gpu/drm/i915/i915_gem.c        | 94 ++++++++++++++++++++++----
  drivers/gpu/drm/i915/i915_gem_object.h | 10 +++
  include/uapi/drm/i915_drm.h            | 30 ++++++++
  5 files changed, 126 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b1200d7ebd13..90785030a0dd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -423,6 +423,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void 
*data,
        case I915_PARAM_HAS_EXEC_CAPTURE:
        case I915_PARAM_HAS_EXEC_BATCH_FIRST:
        case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+       case I915_PARAM_MMAP_OFFSET_VERSION:
                /* For the time being all of these are always true;
                 * if some supported hardware does not have one of these
                 * features this value needs to be provided from
@@ -2936,7 +2937,7 @@ const struct dev_pm_ops i915_pm_ops = {
  static const struct vm_operations_struct i915_gem_vm_ops = {
        .fault = i915_gem_fault,
        .open = drm_gem_vm_open,
-       .close = drm_gem_vm_close,
+       .close = i915_gem_close,
  };
static const struct file_operations i915_driver_fops = {
@@ -2991,7 +2992,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, 
DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 
DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
-       DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, 
DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_OFFSET, i915_gem_mmap_gtt_ioctl, 
DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 
DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 
DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling_ioctl, 
DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 065953a9264f..c6ae157d0ede 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2770,6 +2770,8 @@ int i915_gem_mmap_ioctl(struct drm_device *dev, void 
*data,
                        struct drm_file *file_priv);
  int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_priv);
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+                              struct drm_file *file_priv);
  int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
                              struct drm_file *file_priv);
  int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -3073,6 +3075,7 @@ void i915_gem_suspend_late(struct drm_i915_private 
*dev_priv);
  void i915_gem_resume(struct drm_i915_private *dev_priv);
  int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma);
  vm_fault_t i915_gem_fault(struct vm_fault *vmf);
+void i915_gem_close(struct vm_area_struct *vma);
  int i915_gem_object_wait(struct drm_i915_gem_object *obj,
                         unsigned int flags,
                         long timeout);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 48dbb57fbc6d..cc6c88ec749d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2123,11 +2123,12 @@ static void i915_gem_object_free_mmap_offset(struct 
drm_i915_gem_object *obj)
        drm_gem_free_mmap_offset(&obj->base);
  }
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-                 struct drm_device *dev,
-                 u32 handle,
-                 u64 *offset)
+static int
+__assign_gem_object_mmap_data(struct drm_file *file,
+                             u32 handle,
+                             enum i915_cpu_mmap_origin_type mmap_type,
+                             u64 mmap_flags,
+                             u64 *offset)
  {
        struct drm_i915_gem_object *obj;
        int ret;
@@ -2136,14 +2137,35 @@ i915_gem_mmap_gtt(struct drm_file *file,
        if (!obj)
                return -ENOENT;
+ if (atomic_read(&obj->mmap_count) &&
+           obj->mmap_origin != mmap_type) {

What is the locking for mmap_count? Can it change state between first and second part of the conditional? If not, does it need to be atomic?

+               /* Re-map object with existing different map-type */
+               ret = -EINVAL;

Would -EBUSY be a better fit?

+               goto err;
+       }
+
        ret = i915_gem_object_create_mmap_offset(obj);

If mmap_count is greater than zero, and type/origin match, why a new offset is needed?

-       if (ret == 0)
+       if (ret == 0) {
+               obj->mmap_origin = mmap_type;

Right, so why not obj->mmap_type as well?

+               obj->mmap_flags = mmap_flags;
                *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+       }
+ err:
        i915_gem_object_put(obj);
        return ret;
  }
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+                 struct drm_device *dev,
+                 u32 handle,
+                 u64 *offset)
+{
+       return __assign_gem_object_mmap_data(file, handle, I915_MMAP_ORIGIN_GTT,
+                                            0, offset);
+}

Is there a caller for this function at this point?

+
  /**
   * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
   * @dev: DRM device
@@ -2163,9 +2185,45 @@ int
  i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file)
  {
-       struct drm_i915_gem_mmap_gtt *args = data;
+       struct drm_i915_gem_mmap_offset *args = data;
+       struct drm_i915_private *i915 = to_i915(dev);
+
+       if (args->flags & I915_MMAP_OFFSET_FLAGS)
+               return i915_gem_mmap_offset_ioctl(dev, data, file);
+
+       if (!HAS_MAPPABLE_APERTURE(i915)) {
+               DRM_ERROR("No aperture, cannot mmap via legacy GTT\n");

Maybe best to lose the DRM_ERROR since userspace can hammer on it and it is not really an error in the driver.

+               return -ENODEV;
+       }
+
+       return __assign_gem_object_mmap_data(file, args->handle,
+                                            I915_MMAP_ORIGIN_GTT,
+                                            0, &args->offset);
+}
+
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+                              struct drm_file *file)
+{
+       struct drm_i915_gem_mmap_offset *args = data;
- return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+       if ((args->flags & (I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB)) &&
+           !boot_cpu_has(X86_FEATURE_PAT))
+               return -ENODEV;
+
+        return __assign_gem_object_mmap_data(file, args->handle,
+                                            I915_MMAP_ORIGIN_OFFSET,
+                                            args->flags,
+                                            &args->offset);
+}
+
+void i915_gem_close(struct vm_area_struct *vma)
+{
+       struct drm_gem_object *gem = vma->vm_private_data;
+       struct drm_i915_gem_object *obj = to_intel_bo(gem);
+
+       atomic_dec(&obj->mmap_count);
+
+       drm_gem_vm_close(vma);
  }
int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -2178,12 +2236,19 @@ int i915_gem_mmap(struct file *filp, struct 
vm_area_struct *vma)
                return ret;
obj = to_intel_bo(vma->vm_private_data);
-       if (obj->memory_region) {
-               if (obj->mmap_origin == I915_MMAP_ORIGIN_OFFSET) {
-                       vma->vm_flags &= ~VM_PFNMAP;
-                       vma->vm_flags |= VM_MIXEDMAP;
-               }
+       if (obj->mmap_origin == I915_MMAP_ORIGIN_OFFSET) {
+               vma->vm_flags &= ~VM_PFNMAP;
+               vma->vm_flags |= VM_MIXEDMAP;
+               if (obj->mmap_flags & I915_MMAP_OFFSET_WC)
+                       vma->vm_page_prot =
+                               
pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+               else if (obj->mmap_flags & I915_MMAP_OFFSET_WB)
+                       vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+               else if (obj->mmap_flags & I915_MMAP_OFFSET_UC)
+                       vma->vm_page_prot =
+                               
pgprot_noncached(vm_get_page_prot(vma->vm_flags));
        }
+       atomic_inc(&obj->mmap_count);
return ret;
  }
@@ -4228,7 +4293,8 @@ int i915_gem_vmf_fill_pages_cpu(struct 
drm_i915_gem_object *obj,
        vm_fault_t vmf_ret;
        pgoff_t pg_off = (vmf->address - area->vm_start) >> PAGE_SHIFT;
- if (HAS_MAPPABLE_APERTURE(dev_priv))
+       if (HAS_MAPPABLE_APERTURE(dev_priv) &&
+           obj->mmap_origin == I915_MMAP_ORIGIN_GTT)
                return __vmf_fill_pages_gtt(obj, vmf, page_offset);
page = i915_gem_object_get_page(obj, pg_off);
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h 
b/drivers/gpu/drm/i915/i915_gem_object.h
index 5c6bbe6f5e84..b37ffe2e17b6 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -86,6 +86,12 @@ struct drm_i915_gem_object_ops {
                              pgoff_t);
  };
+enum i915_cpu_mmap_origin_type {

i915_mmap_type ?

+       I915_MMAP_ORIGIN_NONE = 0,
+       I915_MMAP_ORIGIN_GTT,
+       I915_MMAP_ORIGIN_OFFSET,
+};
+
  struct drm_i915_gem_object {
        struct drm_gem_object base;
@@ -157,6 +163,10 @@ struct drm_i915_gem_object {
        unsigned int userfault_count;
        struct list_head userfault_link;
+ enum i915_cpu_mmap_origin_type mmap_origin;
+       atomic_t mmap_count;
+       u64 mmap_flags;

Does mmap_flags need to be stored in the object? Is it not only used when setting up the mmap?

+
        struct list_head batch_pool_link;
        I915_SELFTEST_DECLARE(struct list_head st_link);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 397810fa2d33..26d2274b5d2b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -319,6 +319,7 @@ typedef struct _drm_i915_sarea {
  #define DRM_I915_PERF_ADD_CONFIG      0x37
  #define DRM_I915_PERF_REMOVE_CONFIG   0x38
  #define DRM_I915_QUERY                        0x39
+#define DRM_I915_GEM_MMAP_OFFSET       DRM_I915_GEM_MMAP_GTT
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
  #define DRM_IOCTL_I915_FLUSH          DRM_IO ( DRM_COMMAND_BASE + 
DRM_I915_FLUSH)
@@ -377,6 +378,7 @@ typedef struct _drm_i915_sarea {
  #define DRM_IOCTL_I915_PERF_ADD_CONFIG        DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
  #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG     DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_PERF_REMOVE_CONFIG, __u64)
  #define DRM_IOCTL_I915_QUERY                  DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_MMAP_OFFSET         DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_MMAP_OFFSET, struct drm_i915_gem_mmap_offset)
/* Allow drivers to submit batchbuffers directly to hardware, relying
   * on the security mechanisms provided by hardware.
@@ -559,6 +561,9 @@ typedef struct drm_i915_irq_wait {
   */
  #define I915_PARAM_MMAP_GTT_COHERENT  52
+/* Mmap offset ioctl */
+#define I915_PARAM_MMAP_OFFSET_VERSION 55
+
  typedef struct drm_i915_getparam {
        __s32 param;
        /*
@@ -731,6 +736,31 @@ struct drm_i915_gem_mmap_gtt {
        __u64 offset;
  };
+struct drm_i915_gem_mmap_offset {
+       /** Handle for the object being mapped. */
+       __u32 handle;
+       __u32 pad;
+       /**
+        * Fake offset to use for subsequent mmap call
+        *
+        * This is a fixed-size type for 32/64 compatibility.
+        */
+       __u64 offset;
+
+       /**
+        * Flags for extended behaviour.
+        *
+        * It is mandatory that either one of the _WC/_WB flags
+        * should be passed here.
+        */
+       __u64 flags;
+#define I915_MMAP_OFFSET_WC (1 << 0)
+#define I915_MMAP_OFFSET_WB (1 << 1)
+#define I915_MMAP_OFFSET_UC (1 << 2)

Add explicit GTT as well so userspace can use a single ioctl in all cases?

+#define I915_MMAP_OFFSET_FLAGS \
+       (I915_MMAP_OFFSET_WC | I915_MMAP_OFFSET_WB | I915_MMAP_OFFSET_UC)
+};
+
  struct drm_i915_gem_set_domain {
        /** Handle for the object */
        __u32 handle;

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to