Add per ring score of possible culprit for gpu hang. If
ring is busy and not waiting, it will get the highest score
across calls to i915_hangcheck_elapsed. This way we are
most likely to find the ring that caused the hang among
the waiting ones.

Signed-off-by: Mika Kuoppala <mika.kuopp...@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c         |   63 ++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 2 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7342a96..a41ab2d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -413,7 +413,6 @@ static void notify_ring(struct drm_device *dev,
 
        wake_up_all(&ring->irq_queue);
        if (i915_enable_hangcheck) {
-               dev_priv->gpu_error.hangcheck_count = 0;
                mod_timer(&dev_priv->gpu_error.hangcheck_timer,
                          round_jiffies_up(jiffies + 
DRM_I915_HANGCHECK_JIFFIES));
        }
@@ -1962,52 +1961,56 @@ void i915_hangcheck_elapsed(unsigned long data)
        struct drm_device *dev = (struct drm_device *)data;
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct intel_ring_buffer *ring;
-       bool err = false, idle;
        int i;
-       u32 seqno[I915_NUM_RINGS];
-       bool work_done;
+       int busy_count = 0, rings_hung = 0;
 
        if (!i915_enable_hangcheck)
                return;
 
-       idle = true;
        for_each_ring(ring, dev_priv, i) {
-               seqno[i] = ring->get_seqno(ring, false);
-               idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err);
-       }
+               u32 seqno;
+               bool idle, err = false;
+
+               seqno = ring->get_seqno(ring, false);
+               idle = i915_hangcheck_ring_idle(ring, seqno, &err);
 
-       /* If all work is done then ACTHD clearly hasn't advanced. */
-       if (idle) {
-               if (err) {
-                       if (i915_hangcheck_hung(dev))
-                               return;
+               if (idle) {
+                       if (err)
+                               ring->hangcheck.score++;
+                       else
+                               ring->hangcheck.score = 0;
+               } else {
+                       busy_count++;
 
-                       goto repeat;
+                       if (ring->hangcheck.seqno == seqno) {
+                               ring->hangcheck.score++;
+
+                               /* Kick ring */
+                               i915_hangcheck_ring_hung(ring);
+                       } else {
+                               ring->hangcheck.score = 0;
+                       }
                }
 
-               dev_priv->gpu_error.hangcheck_count = 0;
-               return;
+               ring->hangcheck.seqno = seqno;
        }
 
-       work_done = false;
        for_each_ring(ring, dev_priv, i) {
-               if (ring->hangcheck.seqno != seqno[i]) {
-                       work_done = true;
-                       ring->hangcheck.seqno = seqno[i];
+               if (ring->hangcheck.score > 1) {
+                       rings_hung++;
+                       DRM_ERROR("%s seems hung\n", ring->name);
                }
        }
 
-       if (!work_done) {
-               if (i915_hangcheck_hung(dev))
-                       return;
-       } else {
-               dev_priv->gpu_error.hangcheck_count = 0;
-       }
+       if (rings_hung)
+               return i915_handle_error(dev, true);
 
-repeat:
-       /* Reset timer case chip hangs without another request being added */
-       mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-                 round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+       if (busy_count)
+               /* Reset timer case chip hangs without another request
+                * being added */
+               mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+                         round_jiffies_up(jiffies +
+                                          DRM_I915_HANGCHECK_JIFFIES));
 }
 
 /* drm_dma.h hooks
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 844381e..503e913 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -39,6 +39,7 @@ struct  intel_hw_status_page {
 
 struct intel_ring_hangcheck {
        u32 seqno;
+       int score;
 };
 
 struct  intel_ring_buffer {
-- 
1.7.9.5

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to