On 16/02/2017 11:13, Chris Wilson wrote:
When the timer expires for checking on interrupt processing, check to
see if any interrupts arrived within the last time period. If real
interrupts are still being delivered, we can be reassured that we
haven't missed the final interrupt as the waiter will still be woken.
Only once all activity ceases, do we have to worry about the waiter
never being woken and so need to install a timer to kick the waiter for
a slow arrival of a seqno.

References: https://bugs.freedesktop.org/show_bug.cgi?id=99816
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c          |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 ++++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  3 ++-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1ce54601432b..d3c851c93ee3 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1037,6 +1037,7 @@ static void notify_ring(struct intel_engine_cs *engine)
        struct intel_wait *wait;

        trace_i915_gem_request_notify(engine);
+       atomic_inc(&engine->irq_count);
        set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);

        rcu_read_lock();
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c 
b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index ef3adfd37d7d..3267e671888c 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,11 @@

 #include "i915_drv.h"

+static unsigned long wait_timeout(void)
+{
+       return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
 static void intel_breadcrumbs_hangcheck(unsigned long data)
 {
        struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -36,8 +41,9 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
                return;
        }

-       if (time_before(jiffies, b->timeout)) {
-               mod_timer(&b->hangcheck, b->timeout);
+       if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
+               b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+               mod_timer(&b->hangcheck, wait_timeout());
                return;
        }

@@ -57,11 +63,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
        i915_queue_hangcheck(engine->i915);
 }

-static unsigned long wait_timeout(void)
-{
-       return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
-}
-
 static void intel_breadcrumbs_fake_irq(unsigned long data)
 {
        struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -176,8 +177,8 @@ static void __intel_breadcrumbs_enable_irq(struct 
intel_breadcrumbs *b)
                i915_queue_hangcheck(i915);
        } else {
                /* Ensure we never sleep indefinitely */
-               GEM_BUG_ON(!time_after(b->timeout, jiffies));
-               mod_timer(&b->hangcheck, b->timeout);
+               b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+               mod_timer(&b->hangcheck, wait_timeout());
        }
 }

@@ -270,7 +271,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs 
*engine,
                GEM_BUG_ON(!next && !first);
                if (next && next != &wait->node) {
                        GEM_BUG_ON(first);
-                       b->timeout = wait_timeout();
                        b->first_wait = to_wait(next);
                        /* As there is a delay between reading the current
                         * seqno, processing the completed tasks and selecting
@@ -297,7 +297,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs 
*engine,

        if (first) {
                GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
-               b->timeout = wait_timeout();
                b->first_wait = wait;
                /* After assigning ourselves as the new bottom-half, we must
                 * perform a cursory check to prevent a missed interrupt.
@@ -396,7 +395,6 @@ static void __intel_engine_remove_wait(struct 
intel_engine_cs *engine,
                         * the interrupt, or if we have to handle an
                         * exception rather than a seqno completion.
                         */
-                       b->timeout = wait_timeout();
                        b->first_wait = to_wait(next);
                        if (b->first_wait->seqno != wait->seqno)
                                __intel_breadcrumbs_enable_irq(b);
@@ -702,10 +700,10 @@ void intel_engine_reset_breadcrumbs(struct 
intel_engine_cs *engine)
                irq_disable(engine);

        if (intel_engine_has_waiter(engine)) {
-               b->timeout = wait_timeout();
                if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
                        wake_up_process(b->first_wait->tsk);
-               mod_timer(&b->hangcheck, b->timeout);
+               b->hangcheck_interrupts = atomic_read(&engine->irq_count);
+               mod_timer(&b->hangcheck, wait_timeout());
                /* sanitize the IMR and unmask any auxiliary interrupts */
        }

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index e036ee3e35f9..75b572a9e80a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -213,6 +213,7 @@ struct intel_engine_cs {

        struct intel_render_state *render_state;

+       atomic_t irq_count;
        unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
 #define ENGINE_IRQ_EXECLIST 1
@@ -243,7 +244,7 @@ struct intel_engine_cs {
                struct timer_list fake_irq; /* used after a missed interrupt */
                struct timer_list hangcheck; /* detect missed interrupts */

-               unsigned long timeout;
+               unsigned int hangcheck_interrupts;

                bool irq_armed : 1;
                bool irq_enabled : 1;


I prefer this version by far. Especially since it keeps the normal hangcheck period as long as the interrupts are happening.

I suppose it could only get foiled if interrupts kept coming but the seqno staying put. Does this sounds like a concern?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to