The hardware needs some time to process the information received in the
ExecList Submission Port, and expects us to don't write anything new until
it has 'acknowledged' this new execlist by sending an IDLE_ACTIVE or
PREEMPTED CSB event.

If we do not follow this, the driver could write new data into the ELSP
before HW had finishing fetching the previous one, putting us in
'undefined behaviour' space.

This seems to be the problem causing the spurious PREEMPTED & COMPLETE
events after a COMPLETE like the one below:

[] vcs0: sw rd pointer = 2, hw wr pointer = 0, current 'head' = 3.
[] vcs0:  Execlist CSB[0]: 0x00000018 _ 0x00000007
[] vcs0:  Execlist CSB[1]: 0x00000001 _ 0x00000000
[] vcs0:  Execlist CSB[2]: 0x00000018 _ 0x00000007  <<< COMPLETE
[] vcs0:  Execlist CSB[3]: 0x00000012 _ 0x00000007  <<< PREEMPTED & COMPLETE
[] vcs0:  Execlist CSB[4]: 0x00008002 _ 0x00000006
[] vcs0:  Execlist CSB[5]: 0x00000014 _ 0x00000006

The ELSP writes that lead to this CSB sequence show that the HW hadn't
started executing the previous execlist (the one with only ctx 0x6) by the
time the new one was submitted; this is a bit more clear in the data
show in the EXECLIST_STATUS register at the time of the ELSP write.

[] vcs0: ELSP[0] = 0x0_0        [execlist1] - status_reg = 0x0_302
[] vcs0: ELSP[1] = 0x6_fedb2119 [execlist0] - status_reg = 0x0_8302

[] vcs0: ELSP[2] = 0x7_fedaf119 [execlist1] - status_reg = 0x0_8308
[] vcs0: ELSP[3] = 0x6_fedb2119 [execlist0] - status_reg = 0x7_8308

Note that having to wait for this ack does not disable lite-restores,
although it may reduce their numbers.

And take this as a RFC, since there are probably better ways to still
respect this HW requirement.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102035
Signed-off-by: Michel Thierry <michel.thie...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 16 +++++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |  6 ++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index af41165e3da4..10b7eb64f169 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -449,11 +449,16 @@ static inline void elsp_write(u64 desc, u32 __iomem *elsp)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
+       struct intel_engine_execlists * const execlists = &engine->execlists;
        struct execlist_port *port = engine->execlists.port;
        u32 __iomem *elsp =
                engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
        unsigned int n;
 
+       if (wait_for_atomic(!READ_ONCE(execlists->outstanding_submission), 10))
+               GEM_TRACE("%s outstanding submission stuck\n",
+                         engine->name);
+
        for (n = execlists_num_ports(&engine->execlists); n--; ) {
                struct drm_i915_gem_request *rq;
                unsigned int count;
@@ -479,6 +484,8 @@ static void execlists_submit_ports(struct intel_engine_cs 
*engine)
 
                elsp_write(desc, elsp);
        }
+
+       WRITE_ONCE(execlists->outstanding_submission, 1);
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -889,6 +896,11 @@ static void execlists_submission_tasklet(unsigned long 
data)
                        GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n",
                                  engine->name, head,
                                  status, buf[2*head + 1]);
+
+                       if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) ||
+                           (status & GEN8_CTX_STATUS_PREEMPTED))
+                               WRITE_ONCE(execlists->outstanding_submission, 
0);
+
                        if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
                                continue;
 
@@ -944,9 +956,11 @@ static void execlists_submission_tasklet(unsigned long 
data)
                        /* After the final element, the hw should be idle */
                        GEM_BUG_ON(port_count(port) == 0 &&
                                   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-                       if (port_count(port) == 0)
+                       if (port_count(port) == 0) {
                                execlists_clear_active(execlists,
                                                       EXECLISTS_ACTIVE_USER);
+                               WRITE_ONCE(execlists->outstanding_submission, 
0);
+                       }
                }
 
                if (head != execlists->csb_head) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e5c62f8ef0da..2c8e1a74c266 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -288,6 +288,12 @@ struct intel_engine_execlists {
         * @csb_use_mmio: access csb through mmio, instead of hwsp
         */
        bool csb_use_mmio;
+
+       /**
+        * @outstanding_submission: prevent further ELSP writes until HW
+        * has ack'd one (with IDLE_ACTIVE or PREEMPT CSB events)
+        */
+       bool outstanding_submission;
 };
 
 #define INTEL_ENGINE_CS_MAX_NAME 8
-- 
2.15.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to