If a semaphore is waiting on another ring, which in turn happens to be
waiting on the first ring, but that second semaphore has been signalled,
we will be able to kick the second ring and so can treat the first ring
as a valid WAIT and not as HUNG.

v2: Be paranoid and cap the potential recursion depth whilst visiting
the semaphore signallers. (Mika)

References: https://bugs.freedesktop.org/show_bug.cgi?id=54226
References: https://bugs.freedesktop.org/show_bug.cgi?id=75502
Signed-off-by: Chris Wilson <[email protected]>
Cc: Mika Kuoppala <[email protected]>
Cc: [email protected]
---
 drivers/gpu/drm/i915/i915_irq.c         | 18 ++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a702303eab08..2974698f8f27 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2831,10 +2831,14 @@ static int semaphore_passed(struct intel_engine_cs 
*ring)
        struct intel_engine_cs *signaller;
        u32 seqno, ctl;
 
-       ring->hangcheck.deadlock = true;
+       ring->hangcheck.deadlock++;
 
        signaller = semaphore_waits_for(ring, &seqno);
-       if (signaller == NULL || signaller->hangcheck.deadlock)
+       if (signaller == NULL)
+               return -1;
+
+       /* Prevent pathological recursion due to driver bugs */
+       if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
                return -1;
 
        /* cursory check for an unkickable deadlock */
@@ -2842,7 +2846,13 @@ static int semaphore_passed(struct intel_engine_cs *ring)
        if (ctl & RING_WAIT_SEMAPHORE && semaphore_passed(signaller) < 0)
                return -1;
 
-       return i915_seqno_passed(signaller->get_seqno(signaller, false), seqno);
+       if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+               return 1;
+
+       if (signaller->hangcheck.deadlock)
+               return -1;
+
+       return 0;
 }
 
 static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
@@ -2851,7 +2861,7 @@ static void semaphore_clear_deadlocks(struct 
drm_i915_private *dev_priv)
        int i;
 
        for_each_ring(ring, dev_priv, i)
-               ring->hangcheck.deadlock = false;
+               ring->hangcheck.deadlock = 0;
 }
 
 static enum intel_ring_hangcheck_action
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 24357c597b54..31c2aab8ad2c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -55,7 +55,7 @@ struct intel_ring_hangcheck {
        u32 seqno;
        int score;
        enum intel_ring_hangcheck_action action;
-       bool deadlock;
+       int deadlock;
 };
 
 struct intel_ringbuffer {
-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to