Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state

2014-08-14 Thread Chris Wilson
On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
 Chris Wilson ch...@chris-wilson.co.uk writes:
 
  The current error state harks back to the era of just a single VM. For
  full-ppgtt, we capture every bo on every VM. It behoves us to then print
  every bo for every VM, which we currently fail to do and so miss vital
  information in the error state.
 
  v2: Use the vma address rather than -1!
 
  Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 
 Offsets can collide between different vm areas.
 
 If we add vm index also to the captured batchbuffer objects,
 we could print it part of the offset '%d:0x%x' that would easily
 identify vm and we would immediately see what vm was active on a ring.

The offsets are printed out per-vm. You want to be more specific in your
complaint. Based on earlier discussion, I think you just want to know
the guilty vm.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state

2014-08-14 Thread Mika Kuoppala
Chris Wilson ch...@chris-wilson.co.uk writes:

 On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
 Chris Wilson ch...@chris-wilson.co.uk writes:
 
  The current error state harks back to the era of just a single VM. For
  full-ppgtt, we capture every bo on every VM. It behoves us to then print
  every bo for every VM, which we currently fail to do and so miss vital
  information in the error state.
 
  v2: Use the vma address rather than -1!
 
  Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 
 Offsets can collide between different vm areas.
 
 If we add vm index also to the captured batchbuffer objects,
 we could print it part of the offset '%d:0x%x' that would easily
 identify vm and we would immediately see what vm was active on a ring.

 The offsets are printed out per-vm. You want to be more specific in your
 complaint. Based on earlier discussion, I think you just want to know
 the guilty vm.
 -Chris

Yes. And it can be done as a follow up too.

1/5:
Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state

2014-08-14 Thread Daniel Vetter
On Thu, Aug 14, 2014 at 01:18:46PM +0300, Mika Kuoppala wrote:
 Chris Wilson ch...@chris-wilson.co.uk writes:
 
  On Wed, Aug 13, 2014 at 05:50:38PM +0300, Mika Kuoppala wrote:
  Chris Wilson ch...@chris-wilson.co.uk writes:
  
   The current error state harks back to the era of just a single VM. For
   full-ppgtt, we capture every bo on every VM. It behoves us to then print
   every bo for every VM, which we currently fail to do and so miss vital
   information in the error state.
  
   v2: Use the vma address rather than -1!
  
   Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
  
  Offsets can collide between different vm areas.
  
  If we add vm index also to the captured batchbuffer objects,
  we could print it part of the offset '%d:0x%x' that would easily
  identify vm and we would immediately see what vm was active on a ring.
 
  The offsets are printed out per-vm. You want to be more specific in your
  complaint. Based on earlier discussion, I think you just want to know
  the guilty vm.
  -Chris
 
 Yes. And it can be done as a follow up too.
 
 1/5:
 Reviewed-by: Mika Kuoppala mika.kuopp...@intel.com

Queued for -next, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/5] drm/i915: Print captured bo for all VM in error state

2014-08-13 Thread Mika Kuoppala
Chris Wilson ch...@chris-wilson.co.uk writes:

 The current error state harks back to the era of just a single VM. For
 full-ppgtt, we capture every bo on every VM. It behoves us to then print
 every bo for every VM, which we currently fail to do and so miss vital
 information in the error state.

 v2: Use the vma address rather than -1!

 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk

Offsets can collide between different vm areas.

If we add vm index also to the captured batchbuffer objects,
we could print it part of the offset '%d:0x%x' that would easily
identify vm and we would immediately see what vm was active on a ring.

-Mika

 ---
  drivers/gpu/drm/i915/i915_drv.h   |  2 +
  drivers/gpu/drm/i915/i915_gpu_error.c | 80 
 ---
  2 files changed, 58 insertions(+), 24 deletions(-)

 diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
 index 1bf2cea..e0dcd70 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -396,6 +396,7 @@ struct drm_i915_error_state {
   pid_t pid;
   char comm[TASK_COMM_LEN];
   } ring[I915_NUM_RINGS];
 +
   struct drm_i915_error_buffer {
   u32 size;
   u32 name;
 @@ -414,6 +415,7 @@ struct drm_i915_error_state {
   } **active_bo, **pinned_bo;
  
   u32 *active_bo_count, *pinned_bo_count;
 + u32 vm_count;
  };
  
  struct intel_connector;
 diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
 b/drivers/gpu/drm/i915/i915_gpu_error.c
 index fc11ac6..35e70d5 100644
 --- a/drivers/gpu/drm/i915/i915_gpu_error.c
 +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
 @@ -192,10 +192,10 @@ static void print_error_buffers(struct 
 drm_i915_error_state_buf *m,
   struct drm_i915_error_buffer *err,
   int count)
  {
 - err_printf(m, %s [%d]:\n, name, count);
 + err_printf(m,   %s [%d]:\n, name, count);
  
   while (count--) {
 - err_printf(m,   %08x %8u %02x %02x %x %x,
 + err_printf(m, %08x %8u %02x %02x %x %x,
  err-gtt_offset,
  err-size,
  err-read_domains,
 @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct 
 drm_i915_error_state_buf *m,
   i915_ring_error_state(m, dev, error-ring[i]);
   }
  
 - if (error-active_bo)
 + for (i = 0; i  error-vm_count; i++) {
 + err_printf(m, vm[%d]\n, i);
 +
   print_error_buffers(m, Active,
 - error-active_bo[0],
 - error-active_bo_count[0]);
 + error-active_bo[i],
 + error-active_bo_count[i]);
  
 - if (error-pinned_bo)
   print_error_buffers(m, Pinned,
 - error-pinned_bo[0],
 - error-pinned_bo_count[0]);
 + error-pinned_bo[i],
 + error-pinned_bo_count[i]);
 + }
  
   for (i = 0; i  ARRAY_SIZE(error-ring); i++) {
   obj = error-ring[i].batchbuffer;
 @@ -644,13 +646,15 @@ unwind:
  (src)-base.sizePAGE_SHIFT)
  
  static void capture_bo(struct drm_i915_error_buffer *err,
 -struct drm_i915_gem_object *obj)
 +struct i915_vma *vma)
  {
 + struct drm_i915_gem_object *obj = vma-obj;
 +
   err-size = obj-base.size;
   err-name = obj-base.name;
   err-rseqno = obj-last_read_seqno;
   err-wseqno = obj-last_write_seqno;
 - err-gtt_offset = i915_gem_obj_ggtt_offset(obj);
 + err-gtt_offset = vma-node.start;
   err-read_domains = obj-base.read_domains;
   err-write_domain = obj-base.write_domain;
   err-fence_reg = obj-fence_reg;
 @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer 
 *err,
   int i = 0;
  
   list_for_each_entry(vma, head, mm_list) {
 - capture_bo(err++, vma-obj);
 + capture_bo(err++, vma);
   if (++i == count)
   break;
   }
 @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct 
 drm_i915_error_buffer *err,
  }
  
  static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 -  int count, struct list_head *head)
 +  int count, struct list_head *head,
 +  struct i915_address_space *vm)
  {
   struct drm_i915_gem_object *obj;
 - int i = 0;
 + struct drm_i915_error_buffer * const first = err;
 + struct drm_i915_error_buffer * const last = err + count;
  
   list_for_each_entry(obj, head, global_list) {
 - if (!i915_gem_obj_is_pinned(obj))
 - continue;
 + struct i915_vma *vma;
  
 - capture_bo(err++, obj);
 - if