formerly: "drm/i915: Create VMAs (part 4) - Error capture"

Since the active/inactive lists are per VM, we need to modify the error
capture code to be aware of this, and also extend it to capture the
buffers from all the VMs. For now all the code assumes only 1 VM, but it
will become more generic over the next few patches.

NOTE: If the number of VMs in a real world system grows significantly
we'll have to focus on only capturing the guilty VM, or else it's likely
there won't be enough space for error capture.

v2: Squashed in the "part 6" which had dependencies on the mm_list
change. Since I've moved the mm_list change to an earlier point in the
series, we were able to accomplish it here and now.

Signed-off-by: Ben Widawsky <b...@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   8 +--
 drivers/gpu/drm/i915/i915_drv.h     |   4 +-
 drivers/gpu/drm/i915/i915_irq.c     | 115 ++++++++++++++++++++++++++----------
 3 files changed, 91 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 163ca6b..9a4acc2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -902,13 +902,13 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
 
        if (error->active_bo)
                print_error_buffers(m, "Active",
-                                   error->active_bo,
-                                   error->active_bo_count);
+                                   error->active_bo[0],
+                                   error->active_bo_count[0]);
 
        if (error->pinned_bo)
                print_error_buffers(m, "Pinned",
-                                   error->pinned_bo,
-                                   error->pinned_bo_count);
+                                   error->pinned_bo[0],
+                                   error->pinned_bo_count[0]);
 
        for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
                struct drm_i915_error_object *obj;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 48105f8..b98ad82 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -323,8 +323,8 @@ struct drm_i915_error_state {
                u32 purgeable:1;
                s32 ring:4;
                u32 cache_level:2;
-       } *active_bo, *pinned_bo;
-       u32 active_bo_count, pinned_bo_count;
+       } **active_bo, **pinned_bo;
+       u32 *active_bo_count, *pinned_bo_count;
        struct intel_overlay_error_state *overlay;
        struct intel_display_error_state *display;
 };
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index e065232..bc54d10 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1503,6 +1503,7 @@ static void i915_get_extra_instdone(struct drm_device 
*dev,
 static struct drm_i915_error_object *
 i915_error_object_create_sized(struct drm_i915_private *dev_priv,
                               struct drm_i915_gem_object *src,
+                              struct i915_address_space *vm,
                               const int num_pages)
 {
        struct drm_i915_error_object *dst;
@@ -1516,7 +1517,7 @@ i915_error_object_create_sized(struct drm_i915_private 
*dev_priv,
        if (dst == NULL)
                return NULL;
 
-       reloc_offset = dst->gtt_offset = i915_gem_obj_ggtt_offset(src);
+       reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
        for (i = 0; i < num_pages; i++) {
                unsigned long flags;
                void *d;
@@ -1577,8 +1578,9 @@ unwind:
        kfree(dst);
        return NULL;
 }
-#define i915_error_object_create(dev_priv, src) \
+#define i915_error_object_create(dev_priv, src, vm) \
        i915_error_object_create_sized((dev_priv), (src), \
+                                      vm, \
                                       (src)->base.size>>PAGE_SHIFT)
 
 static void
@@ -1609,19 +1611,24 @@ i915_error_state_free(struct kref *error_ref)
                kfree(error->ring[i].requests);
        }
 
+       /* FIXME: Assume always 1 VM for now */
+       kfree(error->active_bo[0]);
        kfree(error->active_bo);
+       kfree(error->active_bo_count);
+       kfree(error->pinned_bo_count);
        kfree(error->overlay);
        kfree(error->display);
        kfree(error);
 }
 static void capture_bo(struct drm_i915_error_buffer *err,
-                      struct drm_i915_gem_object *obj)
+                      struct drm_i915_gem_object *obj,
+                      struct i915_address_space *vm)
 {
        err->size = obj->base.size;
        err->name = obj->base.name;
        err->rseqno = obj->last_read_seqno;
        err->wseqno = obj->last_write_seqno;
-       err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
+       err->gtt_offset = i915_gem_obj_offset(obj, vm);
        err->read_domains = obj->base.read_domains;
        err->write_domain = obj->base.write_domain;
        err->fence_reg = obj->fence_reg;
@@ -1644,7 +1651,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer 
*err,
        int i = 0;
 
        list_for_each_entry(vma, head, mm_list) {
-               capture_bo(err++, vma->obj);
+               capture_bo(err++, vma->obj, vma->vm);
                if (++i == count)
                        break;
        }
@@ -1659,10 +1666,14 @@ static u32 capture_pinned_bo(struct 
drm_i915_error_buffer *err,
        int i = 0;
 
        list_for_each_entry(obj, head, global_list) {
+               struct i915_vma *vma;
                if (obj->pin_count == 0)
                        continue;
 
-               capture_bo(err++, obj);
+               /* Object may be pinned in multiple VMs, just take first */
+               vma = list_first_entry(&obj->vma_list, struct i915_vma,
+                                      vma_link);
+               capture_bo(err++, obj, vma->vm);
                if (++i == count)
                        break;
        }
@@ -1710,6 +1721,7 @@ i915_error_first_batchbuffer(struct drm_i915_private 
*dev_priv,
        struct i915_vma *vma;
        struct drm_i915_gem_object *obj;
        u32 seqno;
+       u32 pp_db;
 
        if (!ring->get_seqno)
                return NULL;
@@ -1726,11 +1738,19 @@ i915_error_first_batchbuffer(struct drm_i915_private 
*dev_priv,
                obj = ring->private;
                if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
                    acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
-                       return i915_error_object_create(dev_priv, obj);
+                       return i915_error_object_create(dev_priv, obj,
+                                                       &dev_priv->gtt.base);
        }
 
+       pp_db = I915_READ(RING_PP_DIR_BASE(ring));
        seqno = ring->get_seqno(ring, false);
+
        list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+               struct i915_hw_ppgtt *ppgtt =
+                       container_of(vm, struct i915_hw_ppgtt, base);
+               if (!i915_is_ggtt(vm) && pp_db >> 10 != ppgtt->pd_offset)
+                       continue;
+
                list_for_each_entry(vma, &vm->active_list, mm_list) {
                        obj = vma->obj;
                        if (obj->ring != ring)
@@ -1745,7 +1765,7 @@ i915_error_first_batchbuffer(struct drm_i915_private 
*dev_priv,
                        /* We need to copy these to an anonymous buffer as the 
simplest
                         * method to avoid being overwritten by userspace.
                         */
-                       return i915_error_object_create(dev_priv, obj);
+                       return i915_error_object_create(dev_priv, obj, vm);
                }
        }
 
@@ -1802,6 +1822,7 @@ static void i915_gem_record_active_context(struct 
intel_ring_buffer *ring,
                                           struct drm_i915_error_ring *ering)
 {
        struct drm_i915_private *dev_priv = ring->dev->dev_private;
+       struct i915_address_space *ggtt = &dev_priv->gtt.base;
        struct drm_i915_gem_object *obj;
 
        /* Currently render ring is the only HW context user */
@@ -1809,11 +1830,15 @@ static void i915_gem_record_active_context(struct 
intel_ring_buffer *ring,
                return;
 
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+               if (!i915_gem_obj_bound(obj, ggtt))
+                       continue;
+
                if ((error->ccid & PAGE_MASK) ==
                    i915_gem_obj_ggtt_offset(obj)) {
                        ering->ctx = i915_error_object_create_sized(dev_priv,
-                                                                   obj, 1);
-                       break;
+                                                                   obj,
+                                                                   ggtt,
+                                                                   1);
                }
        }
 }
@@ -1833,8 +1858,8 @@ static void i915_gem_record_rings(struct drm_device *dev,
                        i915_error_first_batchbuffer(dev_priv, ring);
 
                error->ring[i].ringbuffer =
-                       i915_error_object_create(dev_priv, ring->obj);
-
+                       i915_error_object_create(dev_priv, ring->obj,
+                                                &dev_priv->gtt.base);
 
                i915_gem_record_active_context(ring, error, &error->ring[i]);
 
@@ -1863,42 +1888,72 @@ static void i915_gem_record_rings(struct drm_device 
*dev,
        }
 }
 
-static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
-                                    struct drm_i915_error_state *error)
+/* FIXME: Since pin count/bound list is global, we duplicate what we capture 
per
+ * VM.
+ */
+static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
+                               struct drm_i915_error_state *error,
+                               struct i915_address_space *vm,
+                               const int ndx)
 {
+       struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
        struct drm_i915_gem_object *obj;
        struct i915_vma *vma;
-       struct i915_address_space *vm = &dev_priv->gtt.base;
        int i;
 
        i = 0;
        list_for_each_entry(vma, &vm->active_list, mm_list)
                i++;
-       error->active_bo_count = i;
+       error->active_bo_count[ndx] = i;
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
                if (obj->pin_count)
                        i++;
-       error->pinned_bo_count = i - error->active_bo_count;
+       error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
 
        if (i) {
-               error->active_bo = kmalloc(sizeof(*error->active_bo)*i,
-                                          GFP_ATOMIC);
-               if (error->active_bo)
-                       error->pinned_bo =
-                               error->active_bo + error->active_bo_count;
+               active_bo = kmalloc(sizeof(*active_bo)*i, GFP_ATOMIC);
+               if (active_bo)
+                       pinned_bo = active_bo + error->active_bo_count[ndx];
        }
 
-       if (error->active_bo)
-               error->active_bo_count =
-                       capture_active_bo(error->active_bo,
-                                         error->active_bo_count,
+       if (active_bo)
+               error->active_bo_count[ndx] =
+                       capture_active_bo(active_bo,
+                                         error->active_bo_count[ndx],
                                          &vm->active_list);
 
-       if (error->pinned_bo)
-               error->pinned_bo_count =
-                       capture_pinned_bo(error->pinned_bo,
-                                         error->pinned_bo_count,
+       if (pinned_bo)
+               error->pinned_bo_count[ndx] =
+                       capture_pinned_bo(pinned_bo,
+                                         error->pinned_bo_count[ndx],
                                          &dev_priv->mm.bound_list);
+       error->active_bo[ndx] = active_bo;
+       error->pinned_bo[ndx] = pinned_bo;
+}
+
+static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
+                                    struct drm_i915_error_state *error)
+{
+       struct i915_address_space *vm;
+       int cnt = 0, i = 0;
+
+       list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+               cnt++;
+
+       if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
+               cnt = 1;
+
+       vm = &dev_priv->gtt.base;
+
+       error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
+       error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
+       error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
+                                        GFP_ATOMIC);
+       error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
+                                        GFP_ATOMIC);
+
+       list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+               i915_gem_capture_vm(dev_priv, error, vm, i++);
 }
 
 /**
-- 
1.8.3.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to