And store the active request so that we only search for it once; this
applies for reset-engine and full reset.

v2: Check for request completion inside _prepare_engine, don't use
ECANCELED, remove unnecessary null checks (Chris).

v3: Capture active requests during reset_prepare and store it the
engine hangcheck obj.

v4: Rename commit, change i915_gem_reset_request to just confirm the
active_request is still incomplete, instead of engine_stalled (Chris).

v5: With style; pass the active request to gem_reset_engine, keep single
return in reset_prepare_engine (Chris).

Suggested-by: Chris Wilson <[email protected]>
Signed-off-by: Michel Thierry <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.c         | 14 ++++++------
 drivers/gpu/drm/i915/i915_drv.h         |  6 ++++--
 drivers/gpu/drm/i915/i915_gem.c         | 38 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d62793805794..2ba288e9311c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1895,23 +1895,25 @@ int i915_reset_engine(struct intel_engine_cs *engine)
        int ret;
        struct drm_i915_private *dev_priv = engine->i915;
        struct i915_gpu_error *error = &dev_priv->gpu_error;
+       struct drm_i915_gem_request *active_request;
 
        GEM_BUG_ON(!test_bit(I915_RESET_ENGINE_IN_PROGRESS, &error->flags));
 
        DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
 
-       ret = i915_gem_reset_prepare_engine(engine);
-       if (ret) {
-               DRM_ERROR("Previous reset failed - promote to full reset\n");
+       active_request = i915_gem_reset_prepare_engine(engine);
+       if (IS_ERR(active_request)) {
+               DRM_DEBUG_DRIVER("Previous reset failed, promote to full 
reset\n");
+               ret = PTR_ERR(active_request);
                goto out;
        }
 
        /*
-        * the request that caused the hang is stuck on elsp, identify the
-        * active request and drop it, adjust head to skip the offending
+        * the request that caused the hang is stuck on elsp, we know the
+        * active request and can drop it, adjust head to skip the offending
         * request to resume executing remaining requests in the queue.
         */
-       i915_gem_reset_engine(engine);
+       i915_gem_reset_engine(engine, active_request);
 
        /* forcing engine to idle */
        ret = intel_reset_engine_start(engine);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5b9c666b3bf..f8cbd286f904 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3370,14 +3370,16 @@ static inline u32 i915_reset_count(struct 
i915_gpu_error *error)
        return READ_ONCE(error->reset_count);
 }
 
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine);
+void i915_gem_reset_engine(struct intel_engine_cs *engine,
+                          struct drm_i915_gem_request *request);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b5dc073a5ddc..6e14bf039aed 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs 
*engine)
        return true;
 }
 
-/* Ensure irq handler finishes, and not run again. */
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also store the active request so that we only search for it once.
+ */
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 {
-       struct drm_i915_gem_request *request;
-       int err = 0;
-
+       struct drm_i915_gem_request *request = NULL;
 
        /* Prevent the signaler thread from updating the request
         * state (by calling dma_fence_signal) as we are processing
@@ -2828,20 +2830,28 @@ int i915_gem_reset_prepare_engine(struct 
intel_engine_cs *engine)
        if (engine_stalled(engine)) {
                request = i915_gem_find_active_request(engine);
                if (request && request->fence.error == -EIO)
-                       err = -EIO; /* Previous reset failed! */
+                       request = ERR_PTR(-EIO); /* Previous reset failed! */
        }
 
-       return err;
+       return request;
 }
 
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
+       struct drm_i915_gem_request *request;
        enum intel_engine_id id;
        int err = 0;
 
-       for_each_engine(engine, dev_priv, id)
-               err = i915_gem_reset_prepare_engine(engine);
+       for_each_engine(engine, dev_priv, id) {
+               request = i915_gem_reset_prepare_engine(engine);
+               if (IS_ERR(request)) {
+                       err = PTR_ERR(request);
+                       break;
+               }
+
+               engine->hangcheck.active_request = request;
+       }
 
        i915_gem_revoke_fences(dev_priv);
 
@@ -2894,7 +2904,7 @@ static void engine_skip_context(struct 
drm_i915_gem_request *request)
 static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
 {
        /* Read once and return the resolution */
-       const bool guilty = engine_stalled(request->engine);
+       const bool guilty = !i915_gem_request_completed(request);
 
        /* The guilty request will get skipped on a hung engine.
         *
@@ -2928,11 +2938,9 @@ static bool i915_gem_reset_request(struct 
drm_i915_gem_request *request)
        return guilty;
 }
 
-void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine,
+                          struct drm_i915_gem_request *request)
 {
-       struct drm_i915_gem_request *request;
-
-       request = i915_gem_find_active_request(engine);
        if (request && i915_gem_reset_request(request)) {
                DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 
0x%x\n",
                                 engine->name, request->global_seqno);
@@ -2958,7 +2966,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
        for_each_engine(engine, dev_priv, id) {
                struct i915_gem_context *ctx;
 
-               i915_gem_reset_engine(engine);
+               i915_gem_reset_engine(engine, engine->hangcheck.active_request);
                ctx = fetch_and_zero(&engine->last_retired_context);
                if (ctx)
                        engine->context_unpin(engine, ctx);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..f850c4b12337 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,6 +121,7 @@ struct intel_engine_hangcheck {
        unsigned long action_timestamp;
        int deadlock;
        struct intel_instdone instdone;
+       struct drm_i915_gem_request *active_request;
        bool stalled;
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to