Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote: Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel thomas.dan...@intel.com v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett robert.beck...@intel.com). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt, as suggested by Daniel. Signed-off-by: Oscar Mateo oscar.ma...@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 35 ++--- drivers/gpu/drm/i915/intel_lrc.c| 129 +-- drivers/gpu/drm/i915/intel_lrc.h|3 + drivers/gpu/drm/i915/intel_ringbuffer.h |1 + 4 files changed, 151 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f77a4ca..e4077d1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp GEN8_RCS_IRQ_SHIFT; - bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[RCS]; if (rcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[RCS]); + notify_ring(dev, ring); + if (rcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[BCS]; if (bcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[BCS]); - if ((rcs | bcs) GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + notify_ring(dev, ring); + if (bcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR(The master control interrupt lied (GT0)!\n); } @@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp GEN8_VCS1_IRQ_SHIFT; + ring = dev_priv-ring[VCS]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + ring = dev_priv-ring[VCS2]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS2]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); } @@ -1684,11 +1695,13 @@ static irqreturn_t
Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote: Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel thomas.dan...@intel.com v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett robert.beck...@intel.com). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt, as suggested by Daniel. Signed-off-by: Oscar Mateo oscar.ma...@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 35 ++--- drivers/gpu/drm/i915/intel_lrc.c| 129 +-- drivers/gpu/drm/i915/intel_lrc.h|3 + drivers/gpu/drm/i915/intel_ringbuffer.h |1 + 4 files changed, 151 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f77a4ca..e4077d1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp GEN8_RCS_IRQ_SHIFT; - bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[RCS]; if (rcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[RCS]); + notify_ring(dev, ring); + if (rcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[BCS]; if (bcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[BCS]); - if ((rcs | bcs) GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + notify_ring(dev, ring); + if (bcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); Also patch split fail here, the above two cases should have been in an earlier patch that added the basic irq handling. -Daniel } else DRM_ERROR(The master control interrupt lied (GT0)!\n); } @@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp GEN8_VCS1_IRQ_SHIFT; + ring = dev_priv-ring[VCS]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + ring = dev_priv-ring[VCS2]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS2]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); } else
Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote: Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel thomas.dan...@intel.com v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett robert.beck...@intel.com). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt, as suggested by Daniel. Signed-off-by: Oscar Mateo oscar.ma...@intel.com +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req-ring-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(dev-struct_mutex); + i915_gem_context_unreference(req-ctx); + mutex_unlock(dev-struct_mutex); + + kfree(req); +} Latching a work item simply for the unference of the context looks very fishy. The context really can't possible disappear before the last request on it has completed, since the request already holds a reference. That you have this additional reference here makes it look a bit like the relationship and lifetime rules for the execlist queue item is misshapen. I'd have expected: - A given execlist queue item is responsible for a list of requests (one or more) - Each request already holds onto the context. The other thing that's strange here is the runtime pm handling. We already keep the device awake as long as it's busy, so I wonder why exactly we need this here in addition. In any case these kind of cleanup tasks are imo better done in the retire request handler that we already have. Imo needs some cleanup. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events
On Thu, Jul 24, 2014 at 05:04:39PM +0100, Thomas Daniel wrote: Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel thomas.dan...@intel.com v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett robert.beck...@intel.com). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt, as suggested by Daniel. Signed-off-by: Oscar Mateo oscar.ma...@intel.com One more ... +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) Please rename this to intel_execlist_ctx_events_irq_handler or similar for consistency with all the other irq handler functions in a follow-up patch. That kind of consistency helps a lot when reviewing the locking of irq-save spinlocks. -Daniel +{ + struct drm_i915_private *dev_priv = ring-dev-dev_private; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 status_id; + u32 submit_contexts = 0; + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + + read_pointer = ring-next_context_status_buffer; + write_pointer = status_pointer 0x07; + if (read_pointer write_pointer) + write_pointer += 6; + + spin_lock(ring-execlist_lock); + + while (read_pointer write_pointer) { + read_pointer++; + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8); + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8 + 4); + + if (status GEN8_CTX_STATUS_COMPLETE) { + if (execlists_check_remove_request(ring, status_id)) + submit_contexts++; + } + } + + if (submit_contexts != 0) + execlists_context_unqueue(ring); + + spin_unlock(ring-execlist_lock); + + WARN(submit_contexts 2, More than two context complete events?\n); + ring-next_context_status_buffer = write_pointer % 6; + + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), + ((u32)ring-next_context_status_buffer 0x07) 8); +} + +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req-ring-dev; + struct drm_i915_private *dev_priv = dev-dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(dev-struct_mutex); + i915_gem_context_unreference(req-ctx); + mutex_unlock(dev-struct_mutex); + + kfree(req); +} + static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) @@ -261,6 +375,8 @@ static int execlists_context_queue(struct intel_engine_cs *ring, i915_gem_context_reference(req-ctx); req-ring = ring; req-tail = tail; + INIT_WORK(req-work, execlists_free_request_task); + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(ring-execlist_lock, flags); @@ -908,6 +1024,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(ring-execlist_queue); spin_lock_init(ring-execlist_lock); + ring-next_context_status_buffer = 0; ret = intel_lr_context_deferred_create(dctx, ring); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 14492a9..2e8929f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -66,6 +66,9 @@ struct intel_ctx_submit_request { u32 tail; struct list_head execlist_link; + struct work_struct work; }; +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); + #endif /* _INTEL_LRC_H_ */ diff --git
[Intel-gfx] [PATCH 31/43] drm/i915/bdw: Handle context switch events
Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel thomas.dan...@intel.com v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett robert.beck...@intel.com). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt, as suggested by Daniel. Signed-off-by: Oscar Mateo oscar.ma...@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 35 ++--- drivers/gpu/drm/i915/intel_lrc.c| 129 +-- drivers/gpu/drm/i915/intel_lrc.h|3 + drivers/gpu/drm/i915/intel_ringbuffer.h |1 + 4 files changed, 151 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f77a4ca..e4077d1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1628,6 +1628,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1637,14 +1638,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp GEN8_RCS_IRQ_SHIFT; - bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[RCS]; if (rcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[RCS]); + notify_ring(dev, ring); + if (rcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp GEN8_BCS_IRQ_SHIFT; + ring = dev_priv-ring[BCS]; if (bcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[BCS]); - if ((rcs | bcs) GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + notify_ring(dev, ring); + if (bcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR(The master control interrupt lied (GT0)!\n); } @@ -1654,16 +1661,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp GEN8_VCS1_IRQ_SHIFT; + ring = dev_priv-ring[VCS]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); + vcs = tmp GEN8_VCS2_IRQ_SHIFT; + ring = dev_priv-ring[VCS2]; if (vcs GT_RENDER_USER_INTERRUPT) - notify_ring(dev, dev_priv-ring[VCS2]); + notify_ring(dev, ring); if (vcs GEN8_GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER(TODO: Context switch\n); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR(The master control interrupt lied (GT1)!\n); } @@ -1684,11 +1695,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) {