Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistently if called concurrently.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  4 ++-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a3dd5bbd6700..6e5546075f17 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3174,10 +3174,15 @@ static void nop_submit_request(struct i915_request 
*request)
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+       struct i915_gpu_error *error = &i915->gpu_error;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
-       GEM_TRACE("start\n");
+       mutex_lock(&error->wedge_mutex);
+       if (test_bit(I915_WEDGED, &error->flags)) {
+               mutex_unlock(&error->wedge_mutex);
+               return;
+       }
 
        if (GEM_SHOW_DEBUG()) {
                struct drm_printer p = drm_debug_printer(__func__);
@@ -3186,8 +3191,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
                        intel_engine_dump(engine, &p, "%s\n", engine->name);
        }
 
-       if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
-               goto out;
+       GEM_TRACE("start\n");
 
        /*
         * First, stop submission to hw, but do not yet complete requests by
@@ -3223,23 +3227,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
                intel_engine_wakeup(engine);
        }
 
-out:
+       smp_mb__before_atomic();
+       set_bit(I915_WEDGED, &error->flags);
+
        GEM_TRACE("end\n");
+       mutex_unlock(&error->wedge_mutex);
 
-       wake_up_all(&i915->gpu_error.reset_queue);
+       wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+       struct i915_gpu_error *error = &i915->gpu_error;
        struct i915_timeline *tl;
+       bool ret = false;
 
        lockdep_assert_held(&i915->drm.struct_mutex);
-       if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+       if (!test_bit(I915_WEDGED, &error->flags))
                return true;
 
        if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
                return false;
 
+       mutex_lock(&error->wedge_mutex);
+
        GEM_TRACE("start\n");
 
        /*
@@ -3273,7 +3285,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
                 */
                if (dma_fence_default_wait(&rq->fence, true,
                                           MAX_SCHEDULE_TIMEOUT) < 0)
-                       return false;
+                       goto unlock;
        }
        i915_retire_requests(i915);
        GEM_BUG_ON(i915->gt.active_requests);
@@ -3296,8 +3308,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 
        smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
        clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+       ret = true;
+unlock:
+       mutex_unlock(&i915->gpu_error.wedge_mutex);
 
-       return true;
+       return ret;
 }
 
 static void
@@ -5693,6 +5708,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
                          i915_gem_idle_work_handler);
        init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
        init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
+       mutex_init(&dev_priv->gpu_error.wedge_mutex);
 
        atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
b/drivers/gpu/drm/i915/i915_gpu_error.h
index 6d9f45468ac1..604291f7762d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -271,8 +271,8 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF     0
 #define I915_RESET_HANDOFF     1
 #define I915_RESET_MODESET     2
+#define I915_RESET_ENGINE      3
 #define I915_WEDGED            (BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE      (I915_WEDGED - I915_NUM_ENGINES)
 
        /** Number of times an engine has been reset */
        u32 reset_engine_count[I915_NUM_ENGINES];
@@ -283,6 +283,8 @@ struct i915_gpu_error {
        /** Reason for the current *global* reset */
        const char *reason;
 
+       struct mutex wedge_mutex; /* serialises wedging/unwedging */
+
        /**
         * Waitqueue to signal when a hang is detected. Used to for waiters
         * to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c 
b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index aa4ddae94aca..4a25d2a344f2 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
 
        init_waitqueue_head(&i915->gpu_error.wait_queue);
        init_waitqueue_head(&i915->gpu_error.reset_queue);
+       mutex_init(&i915->gpu_error.wedge_mutex);
 
        i915->wq = alloc_ordered_workqueue("mock", 0);
        if (!i915->wq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to