Evidence indicates that even if the hardware happily
tells us to proceed with reset, it really isn't ready.
Resetting a freely running batchbuffer after we have
got ack for readiness, still can cause a system hang.

Attempt to stop ring before proceeding for ready check
and reset to avoid losing the machine.

Testcase: igt/prime_busy/hang-* # kbl
Cc: Joonas Lahtinen <[email protected]>
Cc: Chris Wilson <[email protected]>
Signed-off-by: Mika Kuoppala <[email protected]>
---
 drivers/gpu/drm/i915/intel_uncore.c | 54 ++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 1b38eb94d461..f9ef1931516c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1361,33 +1361,38 @@ int i915_reg_read_ioctl(struct drm_device *dev,
        return ret;
 }
 
+static void gen3_stop_ring(struct intel_engine_cs *engine)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       const u32 base = engine->mmio_base;
+       const i915_reg_t mode = RING_MI_MODE(base);
+
+       I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
+       if (intel_wait_for_register_fw(dev_priv,
+                                      mode,
+                                      MODE_IDLE,
+                                      MODE_IDLE,
+                                      500))
+               DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
+                                engine->name);
+
+       I915_WRITE_FW(RING_CTL(base), 0);
+       I915_WRITE_FW(RING_HEAD(base), 0);
+       I915_WRITE_FW(RING_TAIL(base), 0);
+
+       /* Check acts as a post */
+       if (I915_READ_FW(RING_HEAD(base)) != 0)
+               DRM_DEBUG_DRIVER("%s: ring head not parked\n",
+                                engine->name);
+}
+
 static void gen3_stop_rings(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
-       for_each_engine(engine, dev_priv, id) {
-               const u32 base = engine->mmio_base;
-               const i915_reg_t mode = RING_MI_MODE(base);
-
-               I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
-               if (intel_wait_for_register_fw(dev_priv,
-                                              mode,
-                                              MODE_IDLE,
-                                              MODE_IDLE,
-                                              500))
-                       DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
-                                        engine->name);
-
-               I915_WRITE_FW(RING_CTL(base), 0);
-               I915_WRITE_FW(RING_HEAD(base), 0);
-               I915_WRITE_FW(RING_TAIL(base), 0);
-
-               /* Check acts as a post */
-               if (I915_READ_FW(RING_HEAD(base)) != 0)
-                       DRM_DEBUG_DRIVER("%s: ring head not parked\n",
-                                        engine->name);
-       }
+       for_each_engine(engine, dev_priv, id)
+               gen3_stop_ring(engine);
 }
 
 static bool i915_reset_complete(struct pci_dev *pdev)
@@ -1668,6 +1673,11 @@ static int gen8_reset_engine_start(struct 
intel_engine_cs *engine)
        struct drm_i915_private *dev_priv = engine->i915;
        int ret;
 
+       /* If the bb is still running at this stage, forcing a
+        * reset risks a system hang.
+        */
+       gen3_stop_ring(engine);
+
        I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
                      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to