Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:

c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.

(measured relative to a baseline with neither optimisations applied).

Signed-off-by: Chris Wilson <[email protected]>
---
 drivers/gpu/drm/i915/i915_dma.c            |    3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   90 +++++++++++++++++-----------
 include/uapi/drm/i915_drm.h                |    6 ++
 3 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a0c4b4f..a35217d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1020,6 +1020,9 @@ static int i915_getparam(struct drm_device *dev, void 
*data,
        case I915_PARAM_HAS_EXEC_NO_RELOC:
                value = 1;
                break;
+       case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+               value = 1;
+               break;
        default:
                DRM_DEBUG_DRIVER("Unknown parameter %d\n",
                                 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 30beea6..9ccd860 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,25 +39,40 @@
 
 struct eb_objects {
        struct list_head objects;
-       int and;
-       struct hlist_head buckets[0];
+       unsigned int and;
+       union {
+               struct drm_i915_gem_object *lut[0];
+               struct hlist_head buckets[0];
+       };
 };
 
 static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
 {
-       struct eb_objects *eb;
-       int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
-       BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
-       while (count > size)
-               count >>= 1;
-       eb = kzalloc(count*sizeof(struct hlist_head) +
-                    sizeof(struct eb_objects),
-                    GFP_KERNEL);
-       if (eb == NULL)
-               return eb;
-
-       eb->and = count - 1;
+       struct eb_objects *eb = NULL;
+
+       if (args->flags & I915_EXEC_HANDLE_LUT) {
+               int size = args->buffer_count;
+               size *= sizeof(struct drm_i915_gem_object);
+               size += sizeof(struct eb_objects);
+               eb = kzalloc(size, GFP_TEMPORARY | __GFP_NOWARN | 
__GFP_NORETRY);
+       }
+
+       if (eb == NULL) {
+               int size = args->buffer_count;
+               int count = (PAGE_SIZE - sizeof(struct eb_objects)) / 
sizeof(struct hlist_head);
+               BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct 
hlist_head)));
+               while (count > 2*size)
+                       count >>= 1;
+               eb = kzalloc(count*sizeof(struct hlist_head) +
+                            sizeof(struct eb_objects),
+                            GFP_TEMPORARY);
+               if (eb == NULL)
+                       return eb;
+
+               eb->and = count - 1;
+       }
+
        INIT_LIST_HEAD(&eb->objects);
        return eb;
 }
@@ -65,14 +80,8 @@ eb_create(int size)
 static void
 eb_reset(struct eb_objects *eb)
 {
-       memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
-       hlist_add_head(&obj->exec_node,
-                      &eb->buckets[obj->exec_handle & eb->and]);
+       if (eb->and)
+               memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 }
 
 static int
@@ -105,9 +114,14 @@ eb_lookup_objects(struct eb_objects *eb,
                drm_gem_object_reference(&obj->base);
                list_add_tail(&obj->exec_list, &eb->objects);
 
-               obj->exec_handle = exec[i].handle;
                obj->exec_entry = &exec[i];
-               eb_add_object(eb, obj);
+               if (eb->and == 0) {
+                       eb->lut[i] = obj;
+               } else {
+                       obj->exec_handle = exec[i].handle;
+                       hlist_add_head(&obj->exec_node,
+                                      &eb->buckets[exec[i].handle & eb->and]);
+               }
        }
        spin_unlock(&file->table_lock);
 
@@ -117,18 +131,22 @@ eb_lookup_objects(struct eb_objects *eb,
 static struct drm_i915_gem_object *
 eb_get_object(struct eb_objects *eb, unsigned long handle)
 {
-       struct hlist_head *head;
-       struct hlist_node *node;
-       struct drm_i915_gem_object *obj;
+       if (eb->and == 0) {
+               return eb->lut[handle];
+       } else {
+               struct hlist_head *head;
+               struct hlist_node *node;
 
-       head = &eb->buckets[handle & eb->and];
-       hlist_for_each(node, head) {
-               obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
-               if (obj->exec_handle == handle)
-                       return obj;
-       }
+               head = &eb->buckets[handle & eb->and];
+               hlist_for_each(node, head) {
+                       struct drm_i915_gem_object *obj;
 
-       return NULL;
+                       obj = hlist_entry(node, struct drm_i915_gem_object, 
exec_node);
+                       if (obj->exec_handle == handle)
+                               return obj;
+               }
+               return NULL;
+       }
 }
 
 static void
@@ -988,7 +1006,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                goto pre_mutex_err;
        }
 
-       eb = eb_create(args->buffer_count);
+       eb = eb_create(args);
        if (eb == NULL) {
                mutex_unlock(&dev->struct_mutex);
                ret = -ENOMEM;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7657d3e..82c1088 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -310,6 +310,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_RSVD_FOR_FUTURE_USE  22
 #define I915_PARAM_HAS_SECURE_BATCHES   23
 #define I915_PARAM_HAS_EXEC_NO_RELOC    24
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT   25
 
 typedef struct drm_i915_getparam {
        int param;
@@ -690,6 +691,11 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_NO_RELOC             (1<<10)
 
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT           (1<<11)
+
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
        (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to