By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |    1 +
 drivers/gpu/drm/i915/i915_dma.c         |    1 +
 drivers/gpu/drm/i915/i915_drv.h         |   22 +++
 drivers/gpu/drm/i915/i915_gem.c         |   11 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  320 +++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h             |   15 ++
 6 files changed, 366 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0f2c549..754d665 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
          i915_gem_gtt.o \
          i915_gem_stolen.o \
          i915_gem_tiling.o \
+         i915_gem_userptr.o \
          i915_sysfs.o \
          i915_trace_points.o \
          intel_display.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e347a20..7c025ae 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1892,6 +1892,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, 
i915_gem_context_create_ioctl, DRM_UNLOCKED),
        DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, 
i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
        DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+       DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 930270c..c86ba1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 
 /* General customization:
  */
@@ -954,6 +955,7 @@ struct drm_i915_gem_object_ops {
         */
        int (*get_pages)(struct drm_i915_gem_object *);
        void (*put_pages)(struct drm_i915_gem_object *);
+       void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1099,6 +1101,23 @@ struct drm_i915_gem_object {
        atomic_t pending_flip;
 };
 
+struct i915_gem_userptr_object {
+       struct drm_i915_gem_object gem;
+       uintptr_t user_ptr;
+       size_t user_size;
+       int read_only;
+
+       struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+       struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+       struct drm_i915_gem_object base;
+       struct i915_gem_userptr_object userptr;
+};
+
 inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
 {
        return obj->base.import_attach != NULL;
@@ -1364,6 +1383,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void 
*data,
                           struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+                          struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
                        struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -1416,6 +1437,7 @@ static inline void i915_gem_object_unpin_pages(struct 
drm_i915_gem_object *obj)
        BUG_ON(obj->pages_pin_count == 0);
        obj->pages_pin_count--;
 }
+int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0bb5f86..c4731e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1802,7 +1802,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object 
*obj)
        kfree(obj->pages);
 }
 
-static int
+int
 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
        const struct drm_i915_gem_object_ops *ops = obj->ops;
@@ -2571,9 +2571,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
        /* Avoid an unnecessary call to unbind on rebind. */
        obj->map_and_fenceable = true;
 
+       obj->gtt_offset -= obj->gtt_space->start;
        drm_mm_put_block(obj->gtt_space);
        obj->gtt_space = NULL;
-       obj->gtt_offset = 0;
 
        return 0;
 }
@@ -3089,7 +3089,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object 
*obj,
        list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
        list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-       obj->gtt_offset = obj->gtt_space->start;
+       obj->gtt_offset += obj->gtt_space->start;
 
        fenceable =
                obj->gtt_space->size == fence_size &&
@@ -3882,6 +3882,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
        if (obj->base.import_attach)
                drm_prime_gem_destroy(&obj->base, NULL);
 
+       if (obj->ops->release)
+               obj->ops->release(obj);
+
        drm_gem_object_release(&obj->base);
        i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4209,7 +4212,7 @@ i915_gem_load(struct drm_device *dev)
 
        dev_priv->slab =
                kmem_cache_create("i915_gem_object",
-                                 sizeof(struct drm_i915_gem_object), 0,
+                                 sizeof(union drm_i915_gem_objects), 0,
                                  SLAB_HWCACHE_ALIGN,
                                  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..5545181
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct 
drm_i915_gem_object *obj)
+{
+       return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+                                                      struct mm_struct *mm,
+                                                      unsigned long start,
+                                                      unsigned long end)
+{
+       struct i915_gem_userptr_object *vmap;
+       struct drm_device *dev;
+
+       /* XXX race between obj unref and mmu notifier? */
+       vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+       BUG_ON(vmap->mm != mm);
+
+       dev = vmap->gem.base.dev;
+       mutex_lock(&dev->struct_mutex);
+       if (vmap->gem.gtt_space) {
+               struct drm_i915_private *dev_priv = dev->dev_private;
+               bool was_interruptible;
+               int ret;
+
+               was_interruptible = dev_priv->mm.interruptible;
+               dev_priv->mm.interruptible = false;
+
+               ret = i915_gem_object_unbind(&vmap->gem);
+               BUG_ON(ret != -EIO);
+
+               dev_priv->mm.interruptible = was_interruptible;
+       }
+
+       BUG_ON(i915_gem_object_put_pages(&vmap->gem));
+       mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+                                       struct mm_struct *mm)
+{
+       struct i915_gem_userptr_object *vmap;
+
+       vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+       BUG_ON(vmap->mm != mm);
+       vmap->mm = NULL;
+
+       /* XXX Schedule an eventual unbind? E.g. hook into require request?
+        * However, locking will be complicated.
+        */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+       .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+       .release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+       if (vmap->mm) {
+               mmu_notifier_unregister(&vmap->mn, vmap->mm);
+               BUG_ON(vmap->mm);
+       }
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+       vmap->mn.ops = &i915_gem_userptr_notifier;
+       return mmu_notifier_register(&vmap->mn, vmap->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+       struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+       int num_pages = obj->base.size >> PAGE_SHIFT;
+       struct sg_table *st;
+       struct scatterlist *sg;
+       struct page **pvec;
+       int n, pinned, ret;
+
+       if (vmap->mm == NULL)
+               return -EFAULT;
+
+       if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+                      (char __user *)vmap->user_ptr, vmap->user_size))
+               return -EFAULT;
+
+       /* If userspace should engineer that these pages are replaced in
+        * the vma between us binding this page into the GTT and completion
+        * of rendering... Their loss. If they change the mapping of their
+        * pages they need to create a new bo to point to the new vma.
+        *
+        * However, that still leaves open the possibility of the vma
+        * being copied upon fork. Which falls under the same userspace
+        * synchronisation issue as a regular bo, except that this time
+        * the process may not be expecting that a particular piece of
+        * memory is tied to the GPU.
+        *
+        * Fortunately, we can hook into the mmu_notifier in order to
+        * discard the page references prior to anything nasty happening
+        * to the vma (discard or cloning) which should prevent the more
+        * egregious cases from causing harm.
+        */
+
+       pvec = kmalloc(num_pages*sizeof(struct page *),
+                      GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+       if (pvec == NULL) {
+               pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+               if (pvec == NULL)
+                       return -ENOMEM;
+       }
+
+       pinned = 0;
+       if (vmap->mm == current->mm)
+               pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+                                              !vmap->read_only, pvec);
+       if (pinned < num_pages) {
+               struct mm_struct *mm = vmap->mm;
+               ret = 0;
+               mutex_unlock(&obj->base.dev->struct_mutex);
+               down_read(&mm->mmap_sem);
+               if (vmap->mm != NULL)
+                       ret = get_user_pages(current, mm,
+                                            vmap->user_ptr + (pinned << 
PAGE_SHIFT),
+                                            num_pages - pinned,
+                                            !vmap->read_only, 0,
+                                            pvec + pinned,
+                                            NULL);
+               up_read(&mm->mmap_sem);
+               mutex_lock(&obj->base.dev->struct_mutex);
+               if (ret > 0)
+                       pinned += ret;
+
+               if (obj->pages || pinned < num_pages) {
+                       ret = obj->pages ? 0 : -EFAULT;
+                       goto cleanup_pinned;
+               }
+       }
+
+       st = kmalloc(sizeof(*st), GFP_KERNEL);
+       if (st == NULL) {
+               ret = -ENOMEM;
+               goto cleanup_pinned;
+       }
+
+       if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+               ret = -ENOMEM;
+               goto cleanup_st;
+       }
+
+       for_each_sg(st->sgl, sg, num_pages, n)
+               sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+       drm_free_large(pvec);
+
+       obj->pages = st;
+       return 0;
+
+cleanup_st:
+       kfree(st);
+cleanup_pinned:
+       release_pages(pvec, pinned, 0);
+       drm_free_large(pvec);
+       return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+       struct scatterlist *sg;
+       int i;
+
+       if (obj->madv != I915_MADV_WILLNEED)
+               obj->dirty = 0;
+
+       for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+               struct page *page = sg_page(sg);
+
+               if (obj->dirty)
+                       set_page_dirty(page);
+
+               mark_page_accessed(page);
+               page_cache_release(page);
+       }
+       obj->dirty = 0;
+
+       sg_free_table(obj->pages);
+       kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+       struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+       i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+       .get_pages = i915_gem_userptr_get_pages,
+       .put_pages = i915_gem_userptr_put_pages,
+       .release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file 
*file)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_gem_userptr *args = data;
+       struct i915_gem_userptr_object *obj;
+       loff_t first_data_page, last_data_page;
+       int num_pages;
+       int ret;
+       u32 handle;
+
+       if (args->flags & ~(I915_USERPTR_READ_ONLY))
+               return -EINVAL;
+
+       first_data_page = args->user_ptr / PAGE_SIZE;
+       last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+       num_pages = last_data_page - first_data_page + 1;
+       if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+               return -E2BIG;
+
+       ret = fault_in_multipages_readable((char __user 
*)(uintptr_t)args->user_ptr,
+                                          args->user_size);
+       if (ret)
+               return ret;
+
+       /* Allocate the new object */
+       obj = i915_gem_object_alloc(dev);
+       if (obj == NULL)
+               return -ENOMEM;
+
+       if (drm_gem_private_object_init(dev, &obj->gem.base,
+                                       num_pages * PAGE_SIZE)) {
+               i915_gem_object_free(&obj->gem);
+               return -ENOMEM;
+       }
+
+       i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+       obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+       obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+       obj->user_ptr = args->user_ptr;
+       obj->user_size = args->user_size;
+       obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+       /* And keep a pointer to the current->mm for resolving the user pages
+        * at binding. This means that we need to hook into the mmu_notifier
+        * in order to detect if the mmu is destroyed.
+        */
+       obj->mm = current->mm;
+       ret = i915_gem_userptr_init__mmu_notifier(obj);
+       if (ret)
+               return ret;
+
+       ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+       /* drop reference from allocate - handle holds it now */
+       drm_gem_object_unreference(&obj->gem.base);
+       if (ret)
+               return ret;
+
+       args->handle = handle;
+       return 0;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b746a3c..067b98e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING       0x2f
 #define DRM_I915_GEM_GET_CACHING       0x30
 #define DRM_I915_REG_READ              0x31
+#define DRM_I915_GEM_USERPTR           0x32
 
 #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + 
DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH           DRM_IO ( DRM_COMMAND_BASE + 
DRM_I915_FLUSH)
@@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE      DRM_IOWR (DRM_COMMAND_BASE + 
DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY     DRM_IOW (DRM_COMMAND_BASE + 
DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ                        DRM_IOWR 
(DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR                     DRM_IOWR 
(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -950,4 +952,17 @@ struct drm_i915_reg_read {
        __u64 offset;
        __u64 val; /* Return value */
 };
+
+struct drm_i915_gem_userptr {
+       __u64 user_ptr;
+       __u32 user_size;
+       __u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+       /**
+        * Returned handle for the object.
+        *
+        * Object handles are nonzero.
+        */
+       __u32 handle;
+};
 #endif /* _UAPI_I915_DRM_H_ */
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to