Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-08-14 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote:
 Handle all context status events in the context status buffer on every
 context switch interrupt. We only remove work from the execlist queue
 after a context status buffer reports that it has completed and we only
 attempt to schedule new contexts on interrupt when a previously submitted
 context completes (unless no contexts are queued, which means the GPU is
 free).
 
 We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
 grabbed, FWIW), because it might sleep, which is not a nice thing to do.
 Instead, do the runtime_pm get/put together with the create/destroy request,
 and handle the forcewake get/put directly.
 
 Signed-off-by: Thomas Daniel thomas.dan...@intel.com
 
 v2: Unreferencing the context when we are freeing the request might free
 the backing bo, which requires the struct_mutex to be grabbed, so defer
 unreferencing and freeing to a bottom half.
 
 v3:
 - Ack the interrupt inmediately, before trying to handle it (fix for
 missing interrupts by Bob Beckett robert.beck...@intel.com).
 - Update the Context Status Buffer Read Pointer, just in case (spotted
 by Damien Lespiau).
 
 v4: New namespace and multiple rebase changes.
 
 v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
 interrupt, as suggested by Daniel.
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com
 ---
  drivers/gpu/drm/i915/i915_irq.c |   35 ++---
  drivers/gpu/drm/i915/intel_lrc.c|  129 
 +--
  drivers/gpu/drm/i915/intel_lrc.h|3 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
  4 files changed, 151 insertions(+), 17 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
 index f77a4ca..e4077d1 100644
 --- a/drivers/gpu/drm/i915/i915_irq.c
 +++ b/drivers/gpu/drm/i915/i915_irq.c
 @@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
  struct drm_i915_private *dev_priv,
  u32 master_ctl)
  {
 + struct intel_engine_cs *ring;
   u32 rcs, bcs, vcs;
   uint32_t tmp = 0;
   irqreturn_t ret = IRQ_NONE;
 @@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
   if (tmp) {
   I915_WRITE(GEN8_GT_IIR(0), tmp);
   ret = IRQ_HANDLED;
 +
   rcs = tmp  GEN8_RCS_IRQ_SHIFT;
 - bcs = tmp  GEN8_BCS_IRQ_SHIFT;
 + ring = dev_priv-ring[RCS];
   if (rcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[RCS]);
 + notify_ring(dev, ring);
 + if (rcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 + intel_execlists_handle_ctx_events(ring);
 +
 + bcs = tmp  GEN8_BCS_IRQ_SHIFT;
 + ring = dev_priv-ring[BCS];
   if (bcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[BCS]);
 - if ((rcs | bcs)  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + notify_ring(dev, ring);
 + if (bcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 + intel_execlists_handle_ctx_events(ring);
   } else
   DRM_ERROR(The master control interrupt lied (GT0)!\n);
   }
 @@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
   if (tmp) {
   I915_WRITE(GEN8_GT_IIR(1), tmp);
   ret = IRQ_HANDLED;
 +
   vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
 + ring = dev_priv-ring[VCS];
   if (vcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[VCS]);
 + notify_ring(dev, ring);
   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + intel_execlists_handle_ctx_events(ring);
 +
   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
 + ring = dev_priv-ring[VCS2];
   if (vcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[VCS2]);
 + notify_ring(dev, ring);
   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + intel_execlists_handle_ctx_events(ring);
   } else
   DRM_ERROR(The master control interrupt lied (GT1)!\n);
   }
 @@ -1684,11 +1695,13 @@ static irqreturn_t 

Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-08-14 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote:
 Handle all context status events in the context status buffer on every
 context switch interrupt. We only remove work from the execlist queue
 after a context status buffer reports that it has completed and we only
 attempt to schedule new contexts on interrupt when a previously submitted
 context completes (unless no contexts are queued, which means the GPU is
 free).
 
 We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
 grabbed, FWIW), because it might sleep, which is not a nice thing to do.
 Instead, do the runtime_pm get/put together with the create/destroy request,
 and handle the forcewake get/put directly.
 
 Signed-off-by: Thomas Daniel thomas.dan...@intel.com
 
 v2: Unreferencing the context when we are freeing the request might free
 the backing bo, which requires the struct_mutex to be grabbed, so defer
 unreferencing and freeing to a bottom half.
 
 v3:
 - Ack the interrupt inmediately, before trying to handle it (fix for
 missing interrupts by Bob Beckett robert.beck...@intel.com).
 - Update the Context Status Buffer Read Pointer, just in case (spotted
 by Damien Lespiau).
 
 v4: New namespace and multiple rebase changes.
 
 v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
 interrupt, as suggested by Daniel.
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com
 ---
  drivers/gpu/drm/i915/i915_irq.c |   35 ++---
  drivers/gpu/drm/i915/intel_lrc.c|  129 
 +--
  drivers/gpu/drm/i915/intel_lrc.h|3 +
  drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
  4 files changed, 151 insertions(+), 17 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
 index f77a4ca..e4077d1 100644
 --- a/drivers/gpu/drm/i915/i915_irq.c
 +++ b/drivers/gpu/drm/i915/i915_irq.c
 @@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
  struct drm_i915_private *dev_priv,
  u32 master_ctl)
  {
 + struct intel_engine_cs *ring;
   u32 rcs, bcs, vcs;
   uint32_t tmp = 0;
   irqreturn_t ret = IRQ_NONE;
 @@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
   if (tmp) {
   I915_WRITE(GEN8_GT_IIR(0), tmp);
   ret = IRQ_HANDLED;
 +
   rcs = tmp  GEN8_RCS_IRQ_SHIFT;
 - bcs = tmp  GEN8_BCS_IRQ_SHIFT;
 + ring = dev_priv-ring[RCS];
   if (rcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[RCS]);
 + notify_ring(dev, ring);
 + if (rcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 + intel_execlists_handle_ctx_events(ring);
 +
 + bcs = tmp  GEN8_BCS_IRQ_SHIFT;
 + ring = dev_priv-ring[BCS];
   if (bcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[BCS]);
 - if ((rcs | bcs)  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + notify_ring(dev, ring);
 + if (bcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 + intel_execlists_handle_ctx_events(ring);

Also patch split fail here, the above two cases should have been in an
earlier patch that added the basic irq handling.
-Daniel

   } else
   DRM_ERROR(The master control interrupt lied (GT0)!\n);
   }
 @@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
 drm_device *dev,
   if (tmp) {
   I915_WRITE(GEN8_GT_IIR(1), tmp);
   ret = IRQ_HANDLED;
 +
   vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
 + ring = dev_priv-ring[VCS];
   if (vcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[VCS]);
 + notify_ring(dev, ring);
   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + intel_execlists_handle_ctx_events(ring);
 +
   vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
 + ring = dev_priv-ring[VCS2];
   if (vcs  GT_RENDER_USER_INTERRUPT)
 - notify_ring(dev, dev_priv-ring[VCS2]);
 + notify_ring(dev, ring);
   if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
 - DRM_DEBUG_DRIVER(TODO: Context switch\n);
 + intel_execlists_handle_ctx_events(ring);
   } else
 

Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-08-14 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote:
 Handle all context status events in the context status buffer on every
 context switch interrupt. We only remove work from the execlist queue
 after a context status buffer reports that it has completed and we only
 attempt to schedule new contexts on interrupt when a previously submitted
 context completes (unless no contexts are queued, which means the GPU is
 free).
 
 We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
 grabbed, FWIW), because it might sleep, which is not a nice thing to do.
 Instead, do the runtime_pm get/put together with the create/destroy request,
 and handle the forcewake get/put directly.
 
 Signed-off-by: Thomas Daniel thomas.dan...@intel.com
 
 v2: Unreferencing the context when we are freeing the request might free
 the backing bo, which requires the struct_mutex to be grabbed, so defer
 unreferencing and freeing to a bottom half.
 
 v3:
 - Ack the interrupt inmediately, before trying to handle it (fix for
 missing interrupts by Bob Beckett robert.beck...@intel.com).
 - Update the Context Status Buffer Read Pointer, just in case (spotted
 by Damien Lespiau).
 
 v4: New namespace and multiple rebase changes.
 
 v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
 interrupt, as suggested by Daniel.
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com

 +static void execlists_free_request_task(struct work_struct *work)
 +{
 + struct intel_ctx_submit_request *req =
 + container_of(work, struct intel_ctx_submit_request, 
 work);
 + struct drm_device *dev = req-ring-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 +
 + intel_runtime_pm_put(dev_priv);
 +
 + mutex_lock(dev-struct_mutex);
 + i915_gem_context_unreference(req-ctx);
 + mutex_unlock(dev-struct_mutex);
 +
 + kfree(req);
 +}

Latching a work item simply for the unference of the context looks very
fishy. The context really can't possible disappear before the last request
on it has completed, since the request already holds a reference.

That you have this additional reference here makes it look a bit like the
relationship and lifetime rules for the execlist queue item is misshapen.

I'd have expected:
- A given execlist queue item is responsible for a list of requests (one
  or more)
- Each request already holds onto the context.

The other thing that's strange here is the runtime pm handling. We already
keep the device awake as long as it's busy, so I wonder why exactly we
need this here in addition.

In any case these kind of cleanup tasks are imo better done in the retire
request handler that we already have.

Imo needs some cleanup.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-08-14 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote:
 Handle all context status events in the context status buffer on every
 context switch interrupt. We only remove work from the execlist queue
 after a context status buffer reports that it has completed and we only
 attempt to schedule new contexts on interrupt when a previously submitted
 context completes (unless no contexts are queued, which means the GPU is
 free).
 
 We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
 grabbed, FWIW), because it might sleep, which is not a nice thing to do.
 Instead, do the runtime_pm get/put together with the create/destroy request,
 and handle the forcewake get/put directly.
 
 Signed-off-by: Thomas Daniel thomas.dan...@intel.com
 
 v2: Unreferencing the context when we are freeing the request might free
 the backing bo, which requires the struct_mutex to be grabbed, so defer
 unreferencing and freeing to a bottom half.
 
 v3:
 - Ack the interrupt inmediately, before trying to handle it (fix for
 missing interrupts by Bob Beckett robert.beck...@intel.com).
 - Update the Context Status Buffer Read Pointer, just in case (spotted
 by Damien Lespiau).
 
 v4: New namespace and multiple rebase changes.
 
 v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
 interrupt, as suggested by Daniel.
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com

One more ...

 +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)

Please rename this to intel_execlist_ctx_events_irq_handler or similar for
consistency with all the other irq handler functions in a follow-up patch.
That kind of consistency helps a lot when reviewing the locking of
irq-save spinlocks.
-Daniel

 +{
 + struct drm_i915_private *dev_priv = ring-dev-dev_private;
 + u32 status_pointer;
 + u8 read_pointer;
 + u8 write_pointer;
 + u32 status;
 + u32 status_id;
 + u32 submit_contexts = 0;
 +
 + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
 +
 + read_pointer = ring-next_context_status_buffer;
 + write_pointer = status_pointer  0x07;
 + if (read_pointer  write_pointer)
 + write_pointer += 6;
 +
 + spin_lock(ring-execlist_lock);
 +
 + while (read_pointer  write_pointer) {
 + read_pointer++;
 + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
 + (read_pointer % 6) * 8);
 + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
 + (read_pointer % 6) * 8 + 4);
 +
 + if (status  GEN8_CTX_STATUS_COMPLETE) {
 + if (execlists_check_remove_request(ring, status_id))
 + submit_contexts++;
 + }
 + }
 +
 + if (submit_contexts != 0)
 + execlists_context_unqueue(ring);
 +
 + spin_unlock(ring-execlist_lock);
 +
 + WARN(submit_contexts  2, More than two context complete events?\n);
 + ring-next_context_status_buffer = write_pointer % 6;
 +
 + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
 + ((u32)ring-next_context_status_buffer  0x07)  8);
 +}
 +
 +static void execlists_free_request_task(struct work_struct *work)
 +{
 + struct intel_ctx_submit_request *req =
 + container_of(work, struct intel_ctx_submit_request, 
 work);
 + struct drm_device *dev = req-ring-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 +
 + intel_runtime_pm_put(dev_priv);
 +
 + mutex_lock(dev-struct_mutex);
 + i915_gem_context_unreference(req-ctx);
 + mutex_unlock(dev-struct_mutex);
 +
 + kfree(req);
 +}
 +
  static int execlists_context_queue(struct intel_engine_cs *ring,
  struct intel_context *to,
  u32 tail)
 @@ -261,6 +375,8 @@ static int execlists_context_queue(struct intel_engine_cs 
 *ring,
   i915_gem_context_reference(req-ctx);
   req-ring = ring;
   req-tail = tail;
 + INIT_WORK(req-work, execlists_free_request_task);
 + intel_runtime_pm_get(dev_priv);
  
   spin_lock_irqsave(ring-execlist_lock, flags);
  
 @@ -908,6 +1024,7 @@ static int logical_ring_init(struct drm_device *dev, 
 struct intel_engine_cs *rin
  
   INIT_LIST_HEAD(ring-execlist_queue);
   spin_lock_init(ring-execlist_lock);
 + ring-next_context_status_buffer = 0;
  
   ret = intel_lr_context_deferred_create(dctx, ring);
   if (ret)
 diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
 b/drivers/gpu/drm/i915/intel_lrc.h
 index 14492a9..2e8929f 100644
 --- a/drivers/gpu/drm/i915/intel_lrc.h
 +++ b/drivers/gpu/drm/i915/intel_lrc.h
 @@ -66,6 +66,9 @@ struct intel_ctx_submit_request {
   u32 tail;
  
   struct list_head execlist_link;
 + struct work_struct work;
  };
  
 +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
 +
  #endif /* _INTEL_LRC_H_ */
 diff --git 

[Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events

2014-07-24 Thread Thomas Daniel
Handle all context status events in the context status buffer on every
context switch interrupt. We only remove work from the execlist queue
after a context status buffer reports that it has completed and we only
attempt to schedule new contexts on interrupt when a previously submitted
context completes (unless no contexts are queued, which means the GPU is
free).

We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock
grabbed, FWIW), because it might sleep, which is not a nice thing to do.
Instead, do the runtime_pm get/put together with the create/destroy request,
and handle the forcewake get/put directly.

Signed-off-by: Thomas Daniel thomas.dan...@intel.com

v2: Unreferencing the context when we are freeing the request might free
the backing bo, which requires the struct_mutex to be grabbed, so defer
unreferencing and freeing to a bottom half.

v3:
- Ack the interrupt inmediately, before trying to handle it (fix for
missing interrupts by Bob Beckett robert.beck...@intel.com).
- Update the Context Status Buffer Read Pointer, just in case (spotted
by Damien Lespiau).

v4: New namespace and multiple rebase changes.

v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an
interrupt, as suggested by Daniel.

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/i915_irq.c |   35 ++---
 drivers/gpu/drm/i915/intel_lrc.c|  129 +--
 drivers/gpu/drm/i915/intel_lrc.h|3 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 4 files changed, 151 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f77a4ca..e4077d1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device 
*dev,
   struct drm_i915_private *dev_priv,
   u32 master_ctl)
 {
+   struct intel_engine_cs *ring;
u32 rcs, bcs, vcs;
uint32_t tmp = 0;
irqreturn_t ret = IRQ_NONE;
@@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp) {
I915_WRITE(GEN8_GT_IIR(0), tmp);
ret = IRQ_HANDLED;
+
rcs = tmp  GEN8_RCS_IRQ_SHIFT;
-   bcs = tmp  GEN8_BCS_IRQ_SHIFT;
+   ring = dev_priv-ring[RCS];
if (rcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[RCS]);
+   notify_ring(dev, ring);
+   if (rcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   intel_execlists_handle_ctx_events(ring);
+
+   bcs = tmp  GEN8_BCS_IRQ_SHIFT;
+   ring = dev_priv-ring[BCS];
if (bcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[BCS]);
-   if ((rcs | bcs)  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   notify_ring(dev, ring);
+   if (bcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
+   intel_execlists_handle_ctx_events(ring);
} else
DRM_ERROR(The master control interrupt lied (GT0)!\n);
}
@@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp) {
I915_WRITE(GEN8_GT_IIR(1), tmp);
ret = IRQ_HANDLED;
+
vcs = tmp  GEN8_VCS1_IRQ_SHIFT;
+   ring = dev_priv-ring[VCS];
if (vcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[VCS]);
+   notify_ring(dev, ring);
if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   intel_execlists_handle_ctx_events(ring);
+
vcs = tmp  GEN8_VCS2_IRQ_SHIFT;
+   ring = dev_priv-ring[VCS2];
if (vcs  GT_RENDER_USER_INTERRUPT)
-   notify_ring(dev, dev_priv-ring[VCS2]);
+   notify_ring(dev, ring);
if (vcs  GEN8_GT_CONTEXT_SWITCH_INTERRUPT)
-   DRM_DEBUG_DRIVER(TODO: Context switch\n);
+   intel_execlists_handle_ctx_events(ring);
} else
DRM_ERROR(The master control interrupt lied (GT1)!\n);
}
@@ -1684,11 +1695,13 @@ static irqreturn_t gen8_gt_irq_handler(struct 
drm_device *dev,
if (tmp) {