Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.

v2: Restore checking of use_csb_mmio on every loop, don't forget old
vgpu.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: MichaƂ Winiarski <michal.winiar...@intel.com>
CC: Michel Thierry <michel.thie...@intel.com>
Cc: Jeff McGee <jeff.mc...@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 127 +++++++++++++++++++++++++++------------
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cf31b0749023..75dbdedde8b0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -874,34 +874,14 @@ static void execlists_cancel_requests(struct 
intel_engine_cs *engine)
        local_irq_restore(flags);
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
 {
-       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        struct intel_engine_execlists * const execlists = &engine->execlists;
        struct execlist_port * const port = execlists->port;
-       struct drm_i915_private *dev_priv = engine->i915;
+       struct drm_i915_private *i915 = engine->i915;
        bool fw = false;
 
-       /*
-        * We can skip acquiring intel_runtime_pm_get() here as it was taken
-        * on our behalf by the request (see i915_gem_mark_busy()) and it will
-        * not be relinquished until the device is idle (see
-        * i915_gem_idle_work_handler()). As a precaution, we make sure
-        * that all ELSP are drained i.e. we have processed the CSB,
-        * before allowing ourselves to idle and calling intel_runtime_pm_put().
-        */
-       GEM_BUG_ON(!dev_priv->gt.awake);
-
-       /*
-        * Prefer doing test_and_clear_bit() as a two stage operation to avoid
-        * imposing the cost of a locked atomic transaction when submitting a
-        * new request (outside of the context-switch interrupt).
-        */
-       while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
+       do {
                /* The HWSP contains a (cacheable) mirror of the CSB */
                const u32 *buf =
                        &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -909,28 +889,27 @@ static void execlists_submission_tasklet(unsigned long 
data)
 
                if (unlikely(execlists->csb_use_mmio)) {
                        buf = (u32 * __force)
-                               (dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-                       execlists->csb_head = -1; /* force mmio read of CSB 
ptrs */
+                               (i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+                       execlists->csb_head = -1; /* force mmio read of CSB */
                }
 
                /* Clear before reading to catch new interrupts */
                clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
                smp_mb__after_atomic();
 
-               if (unlikely(execlists->csb_head == -1)) { /* following a reset 
*/
+               if (unlikely(execlists->csb_head == -1)) { /* after a reset */
                        if (!fw) {
-                               intel_uncore_forcewake_get(dev_priv,
-                                                          
execlists->fw_domains);
+                               intel_uncore_forcewake_get(i915, 
execlists->fw_domains);
                                fw = true;
                        }
 
-                       head = readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+                       head = readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
                        tail = GEN8_CSB_WRITE_PTR(head);
                        head = GEN8_CSB_READ_PTR(head);
                        execlists->csb_head = head;
                } else {
                        const int write_idx =
-                               intel_hws_csb_write_index(dev_priv) -
+                               intel_hws_csb_write_index(i915) -
                                I915_HWS_CSB_BUF0_INDEX;
 
                        head = execlists->csb_head;
@@ -938,8 +917,8 @@ static void execlists_submission_tasklet(unsigned long data)
                }
                GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
                          engine->name,
-                         head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-                         tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+                         head, GEN8_CSB_READ_PTR(readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+                         tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
 
                while (head != tail) {
                        struct i915_request *rq;
@@ -949,7 +928,8 @@ static void execlists_submission_tasklet(unsigned long data)
                        if (++head == GEN8_CSB_ENTRIES)
                                head = 0;
 
-                       /* We are flying near dragons again.
+                       /*
+                        * We are flying near dragons again.
                         *
                         * We hold a reference to the request in execlist_port[]
                         * but no more than that. We are operating in softirq
@@ -1040,15 +1020,48 @@ static void execlists_submission_tasklet(unsigned long 
data)
                if (head != execlists->csb_head) {
                        execlists->csb_head = head;
                        writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-                              dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+                              i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
                }
-       }
+       } while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
 
-       if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
-               execlists_dequeue(engine);
+       if (unlikely(fw))
+               intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
+
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-       if (fw)
-               intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+       GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+                 engine->name,
+                 engine->i915->gt.awake,
+                 engine->execlists.active,
+                 test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+
+       /*
+        * We can skip acquiring intel_runtime_pm_get() here as it was taken
+        * on our behalf by the request (see i915_gem_mark_busy()) and it will
+        * not be relinquished until the device is idle (see
+        * i915_gem_idle_work_handler()). As a precaution, we make sure
+        * that all ELSP are drained i.e. we have processed the CSB,
+        * before allowing ourselves to idle and calling intel_runtime_pm_put().
+        */
+       GEM_BUG_ON(!engine->i915->gt.awake);
+
+       /*
+        * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+        * imposing the cost of a locked atomic transaction when submitting a
+        * new request (outside of the context-switch interrupt).
+        */
+       if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+               process_csb(engine);
+
+       if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+               execlists_dequeue(engine);
 
        /* If the engine is now idle, so should be the flag; and vice versa. */
        GEM_BUG_ON(execlists_is_active(&engine->execlists,
@@ -1712,6 +1725,7 @@ static struct i915_request *
 execlists_reset_prepare(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
+       struct i915_request *request, *active;
 
        GEM_TRACE("%s\n", engine->name);
 
@@ -1735,7 +1749,40 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
                tasklet_kill(&execlists->tasklet);
        tasklet_disable(&execlists->tasklet);
 
-       return i915_gem_find_active_request(engine);
+       /*
+        * We want to flush the pending context switches, having disabled
+        * the tasklet above, we can assume exclusive access to the execlists.
+        * For this allows us to catch up with an inflight preemption event,
+        * and avoid blaming an innocent request if the stall was due to the
+        * preemption itself.
+        */
+       if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+               process_csb(engine);
+
+       /*
+        * The last active request can then be no later than the last request
+        * now in ELSP[0]. So search backwards from there, so that if the GPU
+        * has advanced beyond the last CSB update, it will be pardoned.
+        */
+       active = NULL;
+       request = port_request(execlists->port);
+       if (request) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&engine->timeline->lock, flags);
+               list_for_each_entry_from_reverse(request,
+                                                &engine->timeline->requests,
+                                                link) {
+                       if (__i915_request_completed(request,
+                                                    request->global_seqno))
+                               break;
+
+                       active = request;
+               }
+               spin_unlock_irqrestore(&engine->timeline->lock, flags);
+       }
+
+       return active;
 }
 
 static void execlists_reset(struct intel_engine_cs *engine,
-- 
2.16.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to