Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-29 Thread Chris Wilson
On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
  +static void
  +i915_gem_object_retire(struct drm_i915_gem_object *obj)
  +{
  +   struct i915_gem_request *rq;
  +   int i;
  +
  +   if (!obj-active)
  +   return;
  +
  +   rq = obj-last_write.request;
  +   if (rq  i915_request_complete(rq, true))
  +   i915_gem_object_retire__write(obj);
  +
  +   rq = obj-last_fence.request;
  +   if (rq  i915_request_complete(rq, true))
  +   i915_gem_object_retire__fence(obj);
  +
  +   for (i = 0; i  I915_NUM_RINGS; i++) {
  +   rq = obj-last_read[i].request;
  +   if (rq  i915_request_complete(rq, true))
  +   i915_gem_object_retire__read(obj, rq-ring);
  +   }
  +}
 
 Unrelated comment on GEM in general: it seems like the callers of this
 all use it after they've waited on the object; maybe it should just
 be called from wait_rendering() to avoid any potential trouble?  The
 name is a bit ambiguous; it could be taken to mean that it does the
 idling.

No. There be dragons. Quite a few of the (indirect) callers of
wait_rendering() cannot handle the recursive freeing of requests/objects
and so we must carefully inspect each caller.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-29 Thread Chris Wilson
On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
  +static void i915_request_retire(struct i915_gem_request *rq)
   {
  -   list_del(request-list);
  -   i915_gem_request_remove_from_client(request);
  +   rq-completed = true;
  +
  +   list_del(rq-list);
  +   i915_gem_request_remove_from_client(rq);
   
  -   if (request-ctx)
  -   i915_gem_context_unreference(request-ctx);
  +   if (rq-ctx) {
  +   i915_gem_context_unreference(rq-ctx);
  +   rq-ctx = NULL;
  +   }
   
  -   kfree(request);
  +   i915_request_put(rq);
   }
 
 I wonder if we can somehow always use this function instead of having
 both obj and request retire functions.  Maybe if we moved obj retire
 into the rendering sync functions we'd have a little less pseudo
 duplication.

I kept it as it was as I was trying to avoid increasing the number of
calls to ring-get_seqno. Elsewhere converting to a fence is good at
reducing the number of calls (as that completed:1 is likely to catch
most cases). But here I think we really do want to treat this en masse.

 request_retire does a put, where is the corresponding get so that we
 don't lose the object when the lock is dropped here or in the
 nonblocking wait?  Or should request_retire not do a put?

Ownership flows from:
  intel_ring_begin - i915_add_request - retire_request.
Everyone else has to grab their own references.

  +static int
  +i915_request_sync(struct i915_gem_request *rq,
  + struct intel_engine_cs *to,
  + struct drm_i915_gem_object *obj)
  +{
  +   int ret, idx;
  +
  +   if (to == NULL)
  +   return i915_wait_request(rq);
  +
  +   /* XXX this is broken by VEBOX+ */
 
 What does this comment mean?  How does VEBOX break this?  Does it not
 have semaphore support or something?

It's a very old comment. In theory it should have been fixed by the recent
gen8 semaphore work.

  @@ -3038,44 +3203,35 @@ out:
*/
   int
   i915_gem_object_sync(struct drm_i915_gem_object *obj,
  -struct intel_engine_cs *to)
  +struct intel_engine_cs *to,
  +bool readonly)
   {
 
 It might be nice to have sync_read/sync_write functions instead, since
 looking up bool args or unnamed enums is a pain.

Not convinced since it is used in a single location and two function
calls would look unelegant - but we could switch to PROT_READ |
PROT_WRITE throughout.

   static bool
   ring_idle(struct intel_engine_cs *ring, u32 seqno)
   {
  return (list_empty(ring-request_list) ||
  -   i915_seqno_passed(seqno, ring_last_seqno(ring)));
  +   __i915_seqno_passed(seqno, ring_last_seqno(ring)));
   }
 
 Unrelated question: why do we have two checks here?  Shouldn't the
 seqno check always be sufficient?  Or is the list_empty check for the
 uninitialized case?

We can't test the seqno without testing that the pointer is valid -
that's the first check. And since we retire lazily we cannot rely on the
request_list being empty itself.

   static int
  -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
  +intel_ring_alloc_request(struct intel_engine_cs *ring)
   {
  -   if (ring-outstanding_lazy_seqno)
  -   return 0;
  +   struct i915_gem_request *rq;
  +   int ret;
   
  -   if (ring-preallocated_lazy_request == NULL) {
  -   struct drm_i915_gem_request *request;
  +   if (ring-preallocated_request)
  +   return 0;
   
  -   request = kmalloc(sizeof(*request), GFP_KERNEL);
  -   if (request == NULL)
  -   return -ENOMEM;
  +   rq = kmalloc(sizeof(*rq), GFP_KERNEL);
  +   if (rq == NULL)
  +   return -ENOMEM;
   
  -   ring-preallocated_lazy_request = request;
  +   ret = i915_gem_get_seqno(ring-dev, rq-seqno);
  +   if (ret) {
  +   kfree(rq);
  +   return ret;
  }
   
  -   return i915_gem_get_seqno(ring-dev,
  ring-outstanding_lazy_seqno);
  +   kref_init(rq-kref);
  +   rq-ring = ring;
  +   rq-completed = false;
  +
  +   ring-preallocated_request = rq;
  +   return 0;
   }
 
 Makes me wonder if we should have our own slab for the objs.  Might
 save a bit of mem and/or perf?  But then could reduce our cache hit
 rate, dunno.

It's just a case of whether we are in a general slab or a named slab.
Having a named slab is nice for debugging, and we definitely have a high
enough reuse rate to justify our own slab.

Elsewhere some useful comments deleted to save electrons :-)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-29 Thread Daniel Vetter
On Tue, Jul 29, 2014 at 08:29:53AM +0100, Chris Wilson wrote:
 On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
   @@ -3038,44 +3203,35 @@ out:
 */
int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
   -  struct intel_engine_cs *to)
   +  struct intel_engine_cs *to,
   +  bool readonly)
{
  
  It might be nice to have sync_read/sync_write functions instead, since
  looking up bool args or unnamed enums is a pain.
 
 Not convinced since it is used in a single location and two function
 calls would look unelegant - but we could switch to PROT_READ |
 PROT_WRITE throughout.

Switching to PROT_READ/WRITE might be nice as a general cleanup (or some
other named thing) since read_only/write booleans are all over the place.
So I like this, but definitely something for a separate patch.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-29 Thread Jesse Barnes
On Tue, 29 Jul 2014 12:41:26 +0200
Daniel Vetter dan...@ffwll.ch wrote:

 On Tue, Jul 29, 2014 at 08:29:53AM +0100, Chris Wilson wrote:
  On Mon, Jul 28, 2014 at 01:44:12PM -0700, Jesse Barnes wrote:
@@ -3038,44 +3203,35 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-struct intel_engine_cs *to)
+struct intel_engine_cs *to,
+bool readonly)
 {
   
   It might be nice to have sync_read/sync_write functions instead, since
   looking up bool args or unnamed enums is a pain.
  
  Not convinced since it is used in a single location and two function
  calls would look unelegant - but we could switch to PROT_READ |
  PROT_WRITE throughout.
 
 Switching to PROT_READ/WRITE might be nice as a general cleanup (or some
 other named thing) since read_only/write booleans are all over the place.
 So I like this, but definitely something for a separate patch.

Agreed, that's a good idea.

-- 
Jesse Barnes, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-28 Thread Daniel Vetter
On Fri, Jul 25, 2014 at 01:27:00PM +0100, Chris Wilson wrote:
 At the heart of this change is that the seqno is a too low level of an
 abstraction to handle the growing complexities of command tracking, both
 with the introduction of multiple command queues with execbuffer and the
 potential for reordering with a scheduler. On top of the seqno we have
 the request. Conceptually this is just a fence, but it also has
 substantial bookkeeping of its own in order to track the context and
 batch in flight, for example. It is the central structure upon which we
 can extend with dependency tracking et al.
 
 As regards the objects, they were using the seqno as a simple fence,
 upon which is check or even wait upon for command completion. This patch
 exchanges that seqno/ring pair with the request itself. For the
 majority, lifetime of the request is ordered by how we retire objects
 then requests. However, both the unlocked waits and probing elsewhere do
 not tie into the normal request lifetimes and so we need to introduce a
 kref. Extending the objects to use the request as the fence naturally
 extends to segregrating read/write fence tracking. This has significance
 for it reduces the number of semaphores we need to emit, reducing the
 likelihood of #54226, and improving performance overall.
 
 NOTE: this is not against bare drm-intel-nightly and is likely to
 conflict with execlists...
 
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Jesse Barnes jbar...@virtuousgeek.org
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Oscar Mateo oscar.ma...@intel.com
 Cc: Brad Volkin bradley.d.vol...@intel.com

Ok, read through it and I like overall. Also, right now is the perfect
time to merge it since we're right before the merge window. But this here
needs to be split up a bit to cut out prep patches. I've noticed a few
things in-line, but there's also the mechanical stuff (like dropping the
drm_ prefix from requests).
-Daniel

 ---
  drivers/gpu/drm/i915/i915_debugfs.c  |  37 +-
  drivers/gpu/drm/i915/i915_drv.h  | 108 ++--
  drivers/gpu/drm/i915/i915_gem.c  | 769 
 ---
  drivers/gpu/drm/i915/i915_gem_context.c  |  19 +-
  drivers/gpu/drm/i915/i915_gem_exec.c |  10 +-
  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
  drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
  drivers/gpu/drm/i915/i915_gem_tiling.c   |   2 +-
  drivers/gpu/drm/i915/i915_gpu_error.c|  35 +-
  drivers/gpu/drm/i915/i915_irq.c  |   6 +-
  drivers/gpu/drm/i915/i915_perf.c |   6 +-
  drivers/gpu/drm/i915/i915_trace.h|   2 +-
  drivers/gpu/drm/i915/intel_display.c |  50 +-
  drivers/gpu/drm/i915/intel_drv.h |   3 +-
  drivers/gpu/drm/i915/intel_overlay.c | 118 ++--
  drivers/gpu/drm/i915/intel_ringbuffer.c  |  83 +--
  drivers/gpu/drm/i915/intel_ringbuffer.h  |  11 +-
  17 files changed, 745 insertions(+), 556 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index 406e630..676d5f1 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct 
 drm_i915_gem_object *obj)
  static void
  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
  {
 + struct i915_gem_request *rq = i915_gem_object_last_read(obj);
   struct i915_vma *vma;
   int pin_count = 0;
  
 - seq_printf(m, %pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s,
 + seq_printf(m, %pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s,
  obj-base,
  get_pin_flag(obj),
  get_tiling_flag(obj),
 @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct 
 drm_i915_gem_object *obj)
  obj-base.size / 1024,
  obj-base.read_domains,
  obj-base.write_domain,
 -obj-last_read_seqno,
 -obj-last_write_seqno,
 -obj-last_fenced_seqno,
 +i915_request_seqno(rq),
 +i915_request_seqno(obj-last_write.request),
 +i915_request_seqno(obj-last_fence.request),
  i915_cache_level_str(obj-cache_level),
  obj-dirty ?  dirty : ,
  obj-madv == I915_MADV_DONTNEED ?  purgeable : );
 @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct 
 drm_i915_gem_object *obj)
   *t = '\0';
   seq_printf(m,  (%s mappable), s);
   }
 - if (obj-ring != NULL)
 - seq_printf(m,  (%s), obj-ring-name);
 + if (rq)
 + seq_printf(m,  (%s), rq-ring-name);
   if (obj-frontbuffer_bits)
   seq_printf(m,  (frontbuffer: 0x%03x), obj-frontbuffer_bits);
  }
 @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
   if (ppgtt-ctx  ppgtt-ctx-file_priv != 
 

Re: [Intel-gfx] [PATCH] drm/i915: s/seqno/request/ tracking inside objects

2014-07-28 Thread Jesse Barnes
On Fri, 25 Jul 2014 13:27:00 +0100
Chris Wilson ch...@chris-wilson.co.uk wrote:

 @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct
 seq_file *m, void *data) seq_printf(m, Flip pending (waiting for
 vsync) on pipe %c (plane %c)\n, pipe, plane);
   }
 - if (work-ring)
 + if (work-flip_queued_request) {
 + struct i915_gem_request *rq =
 work-flip_queued_request; seq_printf(m, Flip queued on %s at seqno
 %u, now %u\n,
 - work-ring-name,
 -
 work-flip_queued_seqno,
 -
 work-ring-get_seqno(work-ring, true));
 - else
 + rq-ring-name,
 rq-seqno,
 +
 rq-ring-get_seqno(rq-ring, true));
 + } else
   seq_printf(m, Flip not associated

I wonder if the overlay, flip queue and unlocked wait changes could be
split out somehow; I think they're the trickiest part of the change...

It does look like you're doing get/puts in the right places, though I
didn't check the error paths (and I'm not familiar at all with the
overlay bits, I guess Daniel will have to look at that).

 with any ring\n); seq_printf(m, Flip queued on frame %d, (was ready
 on frame %d), now %d\n, work-flip_queued_vblank,
 @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file
 *m, void *data) struct drm_device *dev = node-minor-dev;
   struct drm_i915_private *dev_priv = dev-dev_private;
   struct intel_engine_cs *ring;
 - struct drm_i915_gem_request *gem_request;
 + struct i915_gem_request *rq;
   int ret, count, i;
  
   ret = mutex_lock_interruptible(dev-struct_mutex);
 @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct
 seq_file *m, void *data) continue;
  
   seq_printf(m, %s requests:\n, ring-name);
 - list_for_each_entry(gem_request,
 - ring-request_list,
 - list) {
 + list_for_each_entry(rq, ring-request_list, list) {
   seq_printf(m, %d @ %d\n,
 -gem_request-seqno,
 -(int) (jiffies -
 gem_request-emitted_jiffies));
 +rq-seqno,
 +(int)(jiffies -
 rq-emitted_jiffies)); }
   count++;
   }

Semi gratuitous rename (though I know you renamed the struct to catch
all the users).

 diff --git a/drivers/gpu/drm/i915/i915_drv.h
 b/drivers/gpu/drm/i915/i915_drv.h index 9837b0f..5794d096 100644
 --- a/drivers/gpu/drm/i915/i915_drv.h
 +++ b/drivers/gpu/drm/i915/i915_drv.h
 @@ -187,6 +187,7 @@ enum hpd_pin {
  struct drm_i915_private;
  struct i915_mm_struct;
  struct i915_mmu_object;
 +struct i915_gem_request;
  
  enum intel_dpll_id {
   DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
 @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
   struct drm_mm_node *stolen;
   struct list_head global_list;
  
 - struct list_head ring_list;
   /** Used in execbuf to temporarily hold a ref */
   struct list_head obj_exec_link;
  
   /**
* This is set if the object is on the active lists (has
 pending
 -  * rendering and so a non-zero seqno), and is not set if it
 i s on
 -  * inactive (ready to be unbound) list.
 +  * rendering and so a submitted request), and is not set if
 it is on
 +  * inactive (ready to be unbound) list. We track activity
 per engine. */
 - unsigned int active:1;
 + unsigned int active:3;

I wonder if it would be better to be explicit and have a num_rings
sized array here then.  We need 4 bits anyway for the second video ring
right?  We'd need a wrapper to check for active then though...

 @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct
 drm_i915_gem_object *old,
   * sequence-number comparisons on buffer last_rendering_seqnos, and
 associate
   * an emission time with seqnos for tracking how far ahead of the
 GPU we are. */
 -struct drm_i915_gem_request {
 +struct i915_gem_request {
 + struct kref kref;
 +
   /** On Which ring this request was generated */
   struct intel_engine_cs *ring;
  
 @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
   struct drm_i915_file_private *file_priv;
   /** file_priv list entry for this request */
   struct list_head client_list;
 +
 + bool completed:1;
  };

If Daniel pulls in Greg's tree, the kref could turn into a fence and
the completed could be removed.

  
 +static inline struct intel_engine_cs *i915_request_ring(struct
 i915_gem_request *rq) +{
 + return rq ? rq-ring : NULL;
 +}
 +
 +static inline int i915_request_ring_id(struct i915_gem_request *rq)
 +{
 + return rq ? rq-ring-id : -1;
 +}
 +
 +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
 +{
 + return rq ? rq-seqno : 0;
 +}
 +
 +/**
 + * Returns true if seq1