Re: [Intel-gfx] [PATCH v2] drm/i915/execlists: Flush pending preemption events during reset

2018-03-27 Thread Jeff McGee
On Tue, Mar 27, 2018 at 12:44:02PM +0100, Chris Wilson wrote:
> Catch up with the inflight CSB events, after disabling the tasklet
> before deciding which request was truly guilty of hanging the GPU.
> 
> v2: Restore checking of use_csb_mmio on every loop, don't forget old
> vgpu.
> 
> Signed-off-by: Chris Wilson 
> Cc: Michał Winiarski 
> CC: Michel Thierry 
> Cc: Jeff McGee 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 127 
> +++
>  1 file changed, 87 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index cf31b0749023..75dbdedde8b0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,34 +874,14 @@ static void execlists_cancel_requests(struct 
> intel_engine_cs *engine)
>   local_irq_restore(flags);
>  }
>  
> -/*
> - * Check the unread Context Status Buffers and manage the submission of new
> - * contexts to the ELSP accordingly.
> - */
> -static void execlists_submission_tasklet(unsigned long data)
> +static void process_csb(struct intel_engine_cs *engine)
>  {
> - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   struct intel_engine_execlists * const execlists = >execlists;
>   struct execlist_port * const port = execlists->port;
> - struct drm_i915_private *dev_priv = engine->i915;
> + struct drm_i915_private *i915 = engine->i915;
>   bool fw = false;
>  
> - /*
> -  * We can skip acquiring intel_runtime_pm_get() here as it was taken
> -  * on our behalf by the request (see i915_gem_mark_busy()) and it will
> -  * not be relinquished until the device is idle (see
> -  * i915_gem_idle_work_handler()). As a precaution, we make sure
> -  * that all ELSP are drained i.e. we have processed the CSB,
> -  * before allowing ourselves to idle and calling intel_runtime_pm_put().
> -  */
> - GEM_BUG_ON(!dev_priv->gt.awake);
> -
> - /*
> -  * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -  * imposing the cost of a locked atomic transaction when submitting a
> -  * new request (outside of the context-switch interrupt).
> -  */
> - while (test_bit(ENGINE_IRQ_EXECLIST, >irq_posted)) {
> + do {
Wouldn't it be simpler for process_csb to use a while loop here, and for
callers that need a CSB processing point to just call it without themselves
checking irq_posted? Are we really saving much with the if-do-while approach?

>   /* The HWSP contains a (cacheable) mirror of the CSB */
>   const u32 *buf =
>   >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> @@ -909,28 +889,27 @@ static void execlists_submission_tasklet(unsigned long 
> data)
>  
>   if (unlikely(execlists->csb_use_mmio)) {
>   buf = (u32 * __force)
> - (dev_priv->regs + 
> i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> - execlists->csb_head = -1; /* force mmio read of CSB 
> ptrs */
> + (i915->regs + 
> i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> + execlists->csb_head = -1; /* force mmio read of CSB */
>   }
>  
>   /* Clear before reading to catch new interrupts */
>   clear_bit(ENGINE_IRQ_EXECLIST, >irq_posted);
>   smp_mb__after_atomic();
>  
> - if (unlikely(execlists->csb_head == -1)) { /* following a reset 
> */
> + if (unlikely(execlists->csb_head == -1)) { /* after a reset */
>   if (!fw) {
> - intel_uncore_forcewake_get(dev_priv,
> -
> execlists->fw_domains);
> + intel_uncore_forcewake_get(i915, 
> execlists->fw_domains);
>   fw = true;
>   }
>  
> - head = readl(dev_priv->regs + 
> i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + head = readl(i915->regs + 
> i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
>   tail = GEN8_CSB_WRITE_PTR(head);
>   head = GEN8_CSB_READ_PTR(head);
>   execlists->csb_head = head;
>   } else {
>   const int write_idx =
> - intel_hws_csb_write_index(dev_priv) -
> + intel_hws_csb_write_index(i915) -
>   I915_HWS_CSB_BUF0_INDEX;
>  
>   head = execlists->csb_head;
> @@ -938,8 +917,8 @@ static void execlists_submission_tasklet(unsigned long 
> data)
>   }
>   GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d 

[Intel-gfx] [PATCH v2] drm/i915/execlists: Flush pending preemption events during reset

2018-03-27 Thread Chris Wilson
Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.

v2: Restore checking of use_csb_mmio on every loop, don't forget old
vgpu.

Signed-off-by: Chris Wilson 
Cc: Michał Winiarski 
CC: Michel Thierry 
Cc: Jeff McGee 
---
 drivers/gpu/drm/i915/intel_lrc.c | 127 +++
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cf31b0749023..75dbdedde8b0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -874,34 +874,14 @@ static void execlists_cancel_requests(struct 
intel_engine_cs *engine)
local_irq_restore(flags);
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
 {
-   struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = >execlists;
struct execlist_port * const port = execlists->port;
-   struct drm_i915_private *dev_priv = engine->i915;
+   struct drm_i915_private *i915 = engine->i915;
bool fw = false;
 
-   /*
-* We can skip acquiring intel_runtime_pm_get() here as it was taken
-* on our behalf by the request (see i915_gem_mark_busy()) and it will
-* not be relinquished until the device is idle (see
-* i915_gem_idle_work_handler()). As a precaution, we make sure
-* that all ELSP are drained i.e. we have processed the CSB,
-* before allowing ourselves to idle and calling intel_runtime_pm_put().
-*/
-   GEM_BUG_ON(!dev_priv->gt.awake);
-
-   /*
-* Prefer doing test_and_clear_bit() as a two stage operation to avoid
-* imposing the cost of a locked atomic transaction when submitting a
-* new request (outside of the context-switch interrupt).
-*/
-   while (test_bit(ENGINE_IRQ_EXECLIST, >irq_posted)) {
+   do {
/* The HWSP contains a (cacheable) mirror of the CSB */
const u32 *buf =
>status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -909,28 +889,27 @@ static void execlists_submission_tasklet(unsigned long 
data)
 
if (unlikely(execlists->csb_use_mmio)) {
buf = (u32 * __force)
-   (dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-   execlists->csb_head = -1; /* force mmio read of CSB 
ptrs */
+   (i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+   execlists->csb_head = -1; /* force mmio read of CSB */
}
 
/* Clear before reading to catch new interrupts */
clear_bit(ENGINE_IRQ_EXECLIST, >irq_posted);
smp_mb__after_atomic();
 
-   if (unlikely(execlists->csb_head == -1)) { /* following a reset 
*/
+   if (unlikely(execlists->csb_head == -1)) { /* after a reset */
if (!fw) {
-   intel_uncore_forcewake_get(dev_priv,
-  
execlists->fw_domains);
+   intel_uncore_forcewake_get(i915, 
execlists->fw_domains);
fw = true;
}
 
-   head = readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+   head = readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
execlists->csb_head = head;
} else {
const int write_idx =
-   intel_hws_csb_write_index(dev_priv) -
+   intel_hws_csb_write_index(i915) -
I915_HWS_CSB_BUF0_INDEX;
 
head = execlists->csb_head;
@@ -938,8 +917,8 @@ static void execlists_submission_tasklet(unsigned long data)
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
  engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine, fw ? "" : "?",
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine, fw ? "" : "?");
+ head,