If we can't grab the breadcrumb's spinlock, possibly due to a driver
deadlock inside the waiters, ignore them. Like hangcheck, error
capturing must work no matter how the driver/GPU dies.

Signed-off-by: Chris Wilson <[email protected]>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 929db40a3925..14890850ecb1 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -489,7 +489,10 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
                        }
                }
 
-               if (ee->num_waiters) {
+               if (IS_ERR(ee->waiters)) {
+                       err_printf(m, "%s --- ? waiters [unable to acquire 
spinlock]\n",
+                                  dev_priv->engine[i].name);
+               } else if (ee->num_waiters) {
                        err_printf(m, "%s --- %d waiters\n",
                                   dev_priv->engine[i].name,
                                   ee->num_waiters);
@@ -648,7 +651,8 @@ static void i915_error_state_free(struct kref *error_ref)
                i915_error_object_free(ee->wa_ctx);
 
                kfree(ee->requests);
-               kfree(ee->waiters);
+               if (!IS_ERR_OR_NULL(ee->waiters))
+                       kfree(ee->waiters);
        }
 
        i915_error_object_free(error->semaphore);
@@ -933,7 +937,14 @@ static void error_record_engine_waiters(struct 
intel_engine_cs *engine,
        ee->num_waiters = 0;
        ee->waiters = NULL;
 
-       spin_lock(&b->lock);
+       if (RB_EMPTY_ROOT(&b->waiters))
+               return;
+
+       if (!spin_trylock(&b->lock)) {
+               ee->waiters = ERR_PTR(-EDEADLK);
+               return;
+       }
+
        count = 0;
        for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
                count++;
@@ -947,9 +958,13 @@ static void error_record_engine_waiters(struct 
intel_engine_cs *engine,
        if (!waiter)
                return;
 
-       ee->waiters = waiter;
+       if (!spin_trylock(&b->lock)) {
+               kfree(waiter);
+               ee->waiters = ERR_PTR(-EDEADLK);
+               return;
+       }
 
-       spin_lock(&b->lock);
+       ee->waiters = waiter;
        for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
                struct intel_wait *w = container_of(rb, typeof(*w), node);
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to