If we already wedged, i915_gem_set_wedged() becomes a complicated no-op.

v2: Make sure the double set-wedged is synchronous, a parallel call
should not return before the driver is indeed wedged.

References: https://bugs.freedesktop.org/show_bug.cgi?id=107343
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c       | 32 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.h |  3 ++-
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8b52cb768a67..912be7356984 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3298,12 +3298,27 @@ static void nop_complete_submit_request(struct 
i915_request *request)
        spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
 
+static void wait_for_wedged(struct i915_gpu_error *error)
+{
+       DEFINE_WAIT_BIT(wq_entry, &error->flags, I915_WEDGED);
+
+       __wait_on_bit(&error->reset_queue,
+                     &wq_entry, bit_wait, TASK_UNINTERRUPTIBLE);
+}
+
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 {
+       struct i915_gpu_error *error = &i915->gpu_error;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
-       GEM_TRACE("start\n");
+       if (test_bit(I915_WEDGED, &error->flags))
+               return;
+
+       if (test_and_set_bit(I915_WEDGE_IN_PROGRESS, &error->flags)) {
+               wait_for_wedged(error);
+               return;
+       }
 
        if (GEM_SHOW_DEBUG()) {
                struct drm_printer p = drm_debug_printer(__func__);
@@ -3312,8 +3327,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
                        intel_engine_dump(engine, &p, "%s\n", engine->name);
        }
 
-       set_bit(I915_WEDGED, &i915->gpu_error.flags);
-       smp_mb__after_atomic();
+       GEM_TRACE("start\n");
 
        /*
         * First, stop submission to hw, but do not yet complete requests by
@@ -3372,17 +3386,25 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
                i915_gem_reset_finish_engine(engine);
        }
 
+       smp_mb__before_atomic();
+       set_bit(I915_WEDGED, &error->flags);
+       clear_bit(I915_WEDGE_IN_PROGRESS, &error->flags);
+
        GEM_TRACE("end\n");
 
-       wake_up_all(&i915->gpu_error.reset_queue);
+       wake_up_all(&error->reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
+       struct i915_gpu_error *error = &i915->gpu_error;
        struct i915_timeline *tl;
 
        lockdep_assert_held(&i915->drm.struct_mutex);
-       if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+
+       if (test_bit(I915_WEDGE_IN_PROGRESS, &error->flags))
+               wait_for_wedged(error);
+       if (!test_bit(I915_WEDGED, &error->flags))
                return true;
 
        GEM_TRACE("start\n");
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
b/drivers/gpu/drm/i915/i915_gpu_error.h
index f893a4e8b783..1a78a8f330f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -267,8 +267,9 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF     0
 #define I915_RESET_HANDOFF     1
 #define I915_RESET_MODESET     2
+#define I915_RESET_ENGINE      3
 #define I915_WEDGED            (BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE      (I915_WEDGED - I915_NUM_ENGINES)
+#define I915_WEDGE_IN_PROGRESS (I915_WEDGED - 1)
 
        /** Number of times an engine has been reset */
        u32 reset_engine_count[I915_NUM_ENGINES];
-- 
2.18.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to