Re: [Intel-gfx] [PATCH v2] drm/i915/execlists: Flush pending preemption events during reset
On Tue, Mar 27, 2018 at 12:44:02PM +0100, Chris Wilson wrote: > Catch up with the inflight CSB events, after disabling the tasklet > before deciding which request was truly guilty of hanging the GPU. > > v2: Restore checking of use_csb_mmio on every loop, don't forget old > vgpu. > > Signed-off-by: Chris Wilson> Cc: Michał Winiarski > CC: Michel Thierry > Cc: Jeff McGee > --- > drivers/gpu/drm/i915/intel_lrc.c | 127 > +++ > 1 file changed, 87 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index cf31b0749023..75dbdedde8b0 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -874,34 +874,14 @@ static void execlists_cancel_requests(struct > intel_engine_cs *engine) > local_irq_restore(flags); > } > > -/* > - * Check the unread Context Status Buffers and manage the submission of new > - * contexts to the ELSP accordingly. > - */ > -static void execlists_submission_tasklet(unsigned long data) > +static void process_csb(struct intel_engine_cs *engine) > { > - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; > struct intel_engine_execlists * const execlists = >execlists; > struct execlist_port * const port = execlists->port; > - struct drm_i915_private *dev_priv = engine->i915; > + struct drm_i915_private *i915 = engine->i915; > bool fw = false; > > - /* > - * We can skip acquiring intel_runtime_pm_get() here as it was taken > - * on our behalf by the request (see i915_gem_mark_busy()) and it will > - * not be relinquished until the device is idle (see > - * i915_gem_idle_work_handler()). As a precaution, we make sure > - * that all ELSP are drained i.e. we have processed the CSB, > - * before allowing ourselves to idle and calling intel_runtime_pm_put(). > - */ > - GEM_BUG_ON(!dev_priv->gt.awake); > - > - /* > - * Prefer doing test_and_clear_bit() as a two stage operation to avoid > - * imposing the cost of a locked atomic transaction when submitting a > - * new request (outside of the context-switch interrupt). > - */ > - while (test_bit(ENGINE_IRQ_EXECLIST, >irq_posted)) { > + do { Wouldn't it be simpler for process_csb to use a while loop here, and for callers that need a CSB processing point to just call it without themselves checking irq_posted? Are we really saving much with the if-do-while approach? > /* The HWSP contains a (cacheable) mirror of the CSB */ > const u32 *buf = > >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; > @@ -909,28 +889,27 @@ static void execlists_submission_tasklet(unsigned long > data) > > if (unlikely(execlists->csb_use_mmio)) { > buf = (u32 * __force) > - (dev_priv->regs + > i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > - execlists->csb_head = -1; /* force mmio read of CSB > ptrs */ > + (i915->regs + > i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); > + execlists->csb_head = -1; /* force mmio read of CSB */ > } > > /* Clear before reading to catch new interrupts */ > clear_bit(ENGINE_IRQ_EXECLIST, >irq_posted); > smp_mb__after_atomic(); > > - if (unlikely(execlists->csb_head == -1)) { /* following a reset > */ > + if (unlikely(execlists->csb_head == -1)) { /* after a reset */ > if (!fw) { > - intel_uncore_forcewake_get(dev_priv, > - > execlists->fw_domains); > + intel_uncore_forcewake_get(i915, > execlists->fw_domains); > fw = true; > } > > - head = readl(dev_priv->regs + > i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > + head = readl(i915->regs + > i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); > tail = GEN8_CSB_WRITE_PTR(head); > head = GEN8_CSB_READ_PTR(head); > execlists->csb_head = head; > } else { > const int write_idx = > - intel_hws_csb_write_index(dev_priv) - > + intel_hws_csb_write_index(i915) - > I915_HWS_CSB_BUF0_INDEX; > > head = execlists->csb_head; > @@ -938,8 +917,8 @@ static void execlists_submission_tasklet(unsigned long > data) > } > GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d
[Intel-gfx] [PATCH v2] drm/i915/execlists: Flush pending preemption events during reset
Catch up with the inflight CSB events, after disabling the tasklet before deciding which request was truly guilty of hanging the GPU. v2: Restore checking of use_csb_mmio on every loop, don't forget old vgpu. Signed-off-by: Chris WilsonCc: Michał Winiarski CC: Michel Thierry Cc: Jeff McGee --- drivers/gpu/drm/i915/intel_lrc.c | 127 +++ 1 file changed, 87 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cf31b0749023..75dbdedde8b0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -874,34 +874,14 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) local_irq_restore(flags); } -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) +static void process_csb(struct intel_engine_cs *engine) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = >execlists; struct execlist_port * const port = execlists->port; - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; bool fw = false; - /* -* We can skip acquiring intel_runtime_pm_get() here as it was taken -* on our behalf by the request (see i915_gem_mark_busy()) and it will -* not be relinquished until the device is idle (see -* i915_gem_idle_work_handler()). As a precaution, we make sure -* that all ELSP are drained i.e. we have processed the CSB, -* before allowing ourselves to idle and calling intel_runtime_pm_put(). -*/ - GEM_BUG_ON(!dev_priv->gt.awake); - - /* -* Prefer doing test_and_clear_bit() as a two stage operation to avoid -* imposing the cost of a locked atomic transaction when submitting a -* new request (outside of the context-switch interrupt). -*/ - while (test_bit(ENGINE_IRQ_EXECLIST, >irq_posted)) { + do { /* The HWSP contains a (cacheable) mirror of the CSB */ const u32 *buf = >status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; @@ -909,28 +889,27 @@ static void execlists_submission_tasklet(unsigned long data) if (unlikely(execlists->csb_use_mmio)) { buf = (u32 * __force) - (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); - execlists->csb_head = -1; /* force mmio read of CSB ptrs */ + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + execlists->csb_head = -1; /* force mmio read of CSB */ } /* Clear before reading to catch new interrupts */ clear_bit(ENGINE_IRQ_EXECLIST, >irq_posted); smp_mb__after_atomic(); - if (unlikely(execlists->csb_head == -1)) { /* following a reset */ + if (unlikely(execlists->csb_head == -1)) { /* after a reset */ if (!fw) { - intel_uncore_forcewake_get(dev_priv, - execlists->fw_domains); + intel_uncore_forcewake_get(i915, execlists->fw_domains); fw = true; } - head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); tail = GEN8_CSB_WRITE_PTR(head); head = GEN8_CSB_READ_PTR(head); execlists->csb_head = head; } else { const int write_idx = - intel_hws_csb_write_index(dev_priv) - + intel_hws_csb_write_index(i915) - I915_HWS_CSB_BUF0_INDEX; head = execlists->csb_head; @@ -938,8 +917,8 @@ static void execlists_submission_tasklet(unsigned long data) } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, - head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine, fw ? "" : "?", - tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine, fw ? "" : "?"); + head,