Killing context before taking ctx->mutex fixes a hang in
gem_ctx_persistence.close-replace-race, where lut_close
takes obj->resv.lock which is already held by execbuf,
causing a stalling indefinitely.

[ 1904.342847] 2 locks held by gem_ctx_persist/11520:
[ 1904.342849]  #0: ffff8882188e4968 (&ctx->mutex){+.+.}-{3:3}, at: 
context_close+0xe6/0x850 [i915]
[ 1904.342941]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, 
at: lut_close+0x2c2/0xba0 [i915]
[ 1904.343033] 3 locks held by gem_ctx_persist/11521:
[ 1904.343035]  #0: ffffc900008ff938 
(reservation_ww_class_acquire){+.+.}-{0:0}, at: 
i915_gem_do_execbuffer+0x103d/0x54c0 [i915]
[ 1904.343157]  #1: ffff88821c58a5a8 (reservation_ww_class_mutex){+.+.}-{3:3}, 
at: eb_validate_vmas+0x602/0x2010 [i915]
[ 1904.343267]  #2: ffff88820afd9200 (&vm->mutex/1){+.+.}-{3:3}, at: 
i915_vma_pin_ww+0x335/0x2300 [i915]

Signed-off-by: Maarten Lankhorst <[email protected]>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index b9d38e8edb5b..3c89150f7262 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -623,6 +623,18 @@ static void context_close(struct i915_gem_context *ctx)
        i915_gem_context_set_closed(ctx);
        mutex_unlock(&ctx->engines_mutex);
 
+       /*
+        * If the user has disabled hangchecking, we can not be sure that
+        * the batches will ever complete after the context is closed,
+        * keeping the context and all resources pinned forever. So in this
+        * case we opt to forcibly kill off all remaining requests on
+        * context close.
+        */
+       if (!i915_gem_context_is_persistent(ctx) ||
+           !i915_modparams.enable_hangcheck)
+               kill_context(ctx);
+
+
        mutex_lock(&ctx->mutex);
 
        set_closed_name(ctx);
@@ -641,18 +653,6 @@ static void context_close(struct i915_gem_context *ctx)
        lut_close(ctx);
 
        mutex_unlock(&ctx->mutex);
-
-       /*
-        * If the user has disabled hangchecking, we can not be sure that
-        * the batches will ever complete after the context is closed,
-        * keeping the context and all resources pinned forever. So in this
-        * case we opt to forcibly kill off all remaining requests on
-        * context close.
-        */
-       if (!i915_gem_context_is_persistent(ctx) ||
-           !i915_modparams.enable_hangcheck)
-               kill_context(ctx);
-
        i915_gem_context_put(ctx);
 }
 
-- 
2.27.0

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to