Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote: Chris Wilson ch...@chris-wilson.co.uk writes: The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk Offsets can collide between different vm areas. If we add vm index also to the captured batchbuffer objects, we could print it part of the offset '%d:0x%x' that would easily identify vm and we would immediately see what vm was active on a ring. The offsets are printed out per-vm. You want to be more specific in your complaint. Based on earlier discussion, I think you just want to know the guilty vm. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
Chris Wilson ch...@chris-wilson.co.uk writes: On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote: Chris Wilson ch...@chris-wilson.co.uk writes: The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk Offsets can collide between different vm areas. If we add vm index also to the captured batchbuffer objects, we could print it part of the offset '%d:0x%x' that would easily identify vm and we would immediately see what vm was active on a ring. The offsets are printed out per-vm. You want to be more specific in your complaint. Based on earlier discussion, I think you just want to know the guilty vm. -Chris Yes. And it can be done as a follow up too. 1/5: Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
On Thu, Aug 14, 2014 at 01:18:46PM +0300, Mika Kuoppala wrote: Chris Wilson ch...@chris-wilson.co.uk writes: On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote: Chris Wilson ch...@chris-wilson.co.uk writes: The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk Offsets can collide between different vm areas. If we add vm index also to the captured batchbuffer objects, we could print it part of the offset '%d:0x%x' that would easily identify vm and we would immediately see what vm was active on a ring. The offsets are printed out per-vm. You want to be more specific in your complaint. Based on earlier discussion, I think you just want to know the guilty vm. -Chris Yes. And it can be done as a follow up too. 1/5: Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com Queued for -next, thanks for the patch. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state
Chris Wilson ch...@chris-wilson.co.uk writes: The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk Offsets can collide between different vm areas. If we add vm index also to the captured batchbuffer objects, we could print it part of the offset '%d:0x%x' that would easily identify vm and we would immediately see what vm was active on a ring. -Mika --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 80 --- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1bf2cea..e0dcd70 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -396,6 +396,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; } ring[I915_NUM_RINGS]; + struct drm_i915_error_buffer { u32 size; u32 name; @@ -414,6 +415,7 @@ struct drm_i915_error_state { } **active_bo, **pinned_bo; u32 *active_bo_count, *pinned_bo_count; + u32 vm_count; }; struct intel_connector; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fc11ac6..35e70d5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - err_printf(m, %s [%d]:\n, name, count); + err_printf(m, %s [%d]:\n, name, count); while (count--) { - err_printf(m, %08x %8u %02x %02x %x %x, + err_printf(m, %08x %8u %02x %02x %x %x, err-gtt_offset, err-size, err-read_domains, @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, i915_ring_error_state(m, dev, error-ring[i]); } - if (error-active_bo) + for (i = 0; i error-vm_count; i++) { + err_printf(m, vm[%d]\n, i); + print_error_buffers(m, Active, - error-active_bo[0], - error-active_bo_count[0]); + error-active_bo[i], + error-active_bo_count[i]); - if (error-pinned_bo) print_error_buffers(m, Pinned, - error-pinned_bo[0], - error-pinned_bo_count[0]); + error-pinned_bo[i], + error-pinned_bo_count[i]); + } for (i = 0; i ARRAY_SIZE(error-ring); i++) { obj = error-ring[i].batchbuffer; @@ -644,13 +646,15 @@ unwind: (src)-base.sizePAGE_SHIFT) static void capture_bo(struct drm_i915_error_buffer *err, -struct drm_i915_gem_object *obj) +struct i915_vma *vma) { + struct drm_i915_gem_object *obj = vma-obj; + err-size = obj-base.size; err-name = obj-base.name; err-rseqno = obj-last_read_seqno; err-wseqno = obj-last_write_seqno; - err-gtt_offset = i915_gem_obj_ggtt_offset(obj); + err-gtt_offset = vma-node.start; err-read_domains = obj-base.read_domains; err-write_domain = obj-base.write_domain; err-fence_reg = obj-fence_reg; @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, int i = 0; list_for_each_entry(vma, head, mm_list) { - capture_bo(err++, vma-obj); + capture_bo(err++, vma); if (++i == count) break; } @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, } static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) + int count, struct list_head *head, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - int i = 0; + struct drm_i915_error_buffer * const first = err; + struct drm_i915_error_buffer * const last = err + count; list_for_each_entry(obj, head, global_list) { - if (!i915_gem_obj_is_pinned(obj)) - continue; + struct i915_vma *vma; - capture_bo(err++, obj); - if