If engine reports that it is not ready for reset, we
give up. Evidence shows that forcing a per engine reset
on an engine which is not reporting to be ready for reset,
can bring it back into a working order. There is risk that
we corrupt the context image currently executing on that
engine. But that is a risk worth taking as if we unblock
the engine, we prevent a whole device wedging in a case
of full gpu reset.

Reset individual engine even if it reports that it is not
prepared for reset, but only if we aim for full gpu reset
and not on first reset attempt.

v2: force reset only on later attempts, readability (Chris)

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 49 +++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index 027d14574bfa..d24026839b17 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -2099,9 +2099,6 @@ static int gen8_reset_engine_start(struct intel_engine_cs 
*engine)
                                           RESET_CTL_READY_TO_RESET,
                                           700, 0,
                                           NULL);
-       if (ret)
-               DRM_ERROR("%s: reset request timeout\n", engine->name);
-
        return ret;
 }
 
@@ -2113,6 +2110,42 @@ static void gen8_reset_engine_cancel(struct 
intel_engine_cs *engine)
                      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
 }
 
+static int reset_engines(struct drm_i915_private *i915,
+                        unsigned int engine_mask,
+                        unsigned int retry)
+{
+       if (INTEL_GEN(i915) >= 11)
+               return gen11_reset_engines(i915, engine_mask);
+       else
+               return gen6_reset_engines(i915, engine_mask, retry);
+}
+
+static int gen8_prepare_engine_for_reset(struct intel_engine_cs *engine,
+                                        unsigned int retry)
+{
+       const bool force_reset_on_non_ready = retry >= 1;
+       int ret;
+
+       ret = gen8_reset_engine_start(engine);
+
+       if (ret && force_reset_on_non_ready) {
+               /*
+                * Try to unblock a single non-ready engine by risking
+                * context corruption.
+                */
+               ret = reset_engines(engine->i915,
+                                   intel_engine_flag(engine),
+                                   retry);
+               if (!ret)
+                       ret = gen8_reset_engine_start(engine);
+
+               DRM_ERROR("%s: reset request timeout, forcing reset (%d)\n",
+                         engine->name, ret);
+       }
+
+       return ret;
+}
+
 static int gen8_reset_engines(struct drm_i915_private *dev_priv,
                              unsigned int engine_mask,
                              unsigned int retry)
@@ -2122,16 +2155,12 @@ static int gen8_reset_engines(struct drm_i915_private 
*dev_priv,
        int ret;
 
        for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
-               if (gen8_reset_engine_start(engine)) {
-                       ret = -EIO;
+               ret = gen8_prepare_engine_for_reset(engine, retry);
+               if (ret)
                        goto not_ready;
-               }
        }
 
-       if (INTEL_GEN(dev_priv) >= 11)
-               ret = gen11_reset_engines(dev_priv, engine_mask);
-       else
-               ret = gen6_reset_engines(dev_priv, engine_mask, retry);
+       ret = reset_engines(dev_priv, engine_mask, retry);
 
 not_ready:
        for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to