Chris Wilson <ch...@chris-wilson.co.uk> writes:

> Commit 0f36a85c3bd5 ("drm/i915: Flush pending interrupt following a GPU
> reset") got confused and only applied the flush to the set-wedge path
> (which itself is proving troublesome), but we also need the
> serialisation on the regular reset path. Oops.
>
> Move the interrupt into reset_irq() and make it common to the reset and
> final set-wedge.
>
> v2: reset_irq() after port cancellation, as we assert that
> execlists->active is sane for cancellation (and is being reset by
> reset_irq).
>
> References: 0f36a85c3bd5 ("drm/i915: Flush pending interrupt following a GPU 
> reset")
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> Cc: Michel Thierry <michel.thie...@intel.com>
> Cc: Michał Winiarski <michal.winiar...@intel.com>
> Cc: Jeff McGee <jeff.mc...@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 107 
> +++++++++++++++++++--------------------
>  1 file changed, 53 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index ce09c5ad334f..b4ab06b05e58 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -740,6 +740,57 @@ execlists_cancel_port_requests(struct 
> intel_engine_execlists * const execlists)
>       }
>  }
>  
> +static void clear_gtiir(struct intel_engine_cs *engine)
> +{
> +     static const u8 gtiir[] = {
> +             [RCS]  = 0,
> +             [BCS]  = 0,
> +             [VCS]  = 1,
> +             [VCS2] = 1,
> +             [VECS] = 3,
> +     };
> +     struct drm_i915_private *dev_priv = engine->i915;
> +     int i;
> +
> +     /* TODO: correctly reset irqs for gen11 */
> +     if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
> +             return;
> +
> +     GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> +
> +     /*
> +      * Clear any pending interrupt state.
> +      *
> +      * We do it twice out of paranoia that some of the IIR are
> +      * double buffered, and so if we only reset it once there may
> +      * still be an interrupt pending.
> +      */
> +     for (i = 0; i < 2; i++) {
> +             I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> +                        engine->irq_keep_mask);
> +             POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
> +     }
> +     GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
> +                engine->irq_keep_mask);
> +}
> +
> +static void reset_irq(struct intel_engine_cs *engine)
> +{
> +     /* Mark all CS interrupts as complete */
> +     smp_store_mb(engine->execlists.active, 0);
> +     synchronize_hardirq(engine->i915->drm.irq);
> +
> +     clear_gtiir(engine);
> +
> +     /*
> +      * The port is checked prior to scheduling a tasklet, but
> +      * just in case we have suspended the tasklet to do the
> +      * wedging make sure that when it wakes, it decides there
> +      * is no work to do by clearing the irq_posted bit.
> +      */
> +     clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> +}
> +
>  static void execlists_cancel_requests(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -767,6 +818,7 @@ static void execlists_cancel_requests(struct 
> intel_engine_cs *engine)
>  
>       /* Cancel the requests on the HW and clear the ELSP tracker. */
>       execlists_cancel_port_requests(execlists);
> +     reset_irq(engine);
>  
>       spin_lock(&engine->timeline->lock);
>  
> @@ -805,18 +857,6 @@ static void execlists_cancel_requests(struct 
> intel_engine_cs *engine)
>  
>       spin_unlock(&engine->timeline->lock);
>  
> -     /* Mark all CS interrupts as complete */
> -     smp_store_mb(execlists->active, 0);
> -     synchronize_hardirq(engine->i915->drm.irq);
> -
> -     /*
> -      * The port is checked prior to scheduling a tasklet, but
> -      * just in case we have suspended the tasklet to do the
> -      * wedging make sure that when it wakes, it decides there
> -      * is no work to do by clearing the irq_posted bit.
> -      */
> -     clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -
>       local_irq_restore(flags);
>  }
>  
> @@ -1566,14 +1606,6 @@ static int intel_init_workaround_bb(struct 
> intel_engine_cs *engine)
>       return ret;
>  }
>  
> -static u8 gtiir[] = {
> -     [RCS] = 0,
> -     [BCS] = 0,
> -     [VCS] = 1,
> -     [VCS2] = 1,
> -     [VECS] = 3,
> -};
> -
>  static void enable_execlists(struct intel_engine_cs *engine)
>  {
>       struct drm_i915_private *dev_priv = engine->i915;
> @@ -1657,35 +1689,6 @@ static int gen9_init_render_ring(struct 
> intel_engine_cs *engine)
>       return init_workarounds_ring(engine);
>  }
>  
> -static void reset_irq(struct intel_engine_cs *engine)
> -{
> -     struct drm_i915_private *dev_priv = engine->i915;
> -     int i;
> -
> -     /* TODO: correctly reset irqs for gen11 */
> -     if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
> -             return;
> -
> -     GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
> -
> -     /*
> -      * Clear any pending interrupt state.
> -      *
> -      * We do it twice out of paranoia that some of the IIR are double
> -      * buffered, and if we only reset it once there may still be
> -      * an interrupt pending.
> -      */
> -     for (i = 0; i < 2; i++) {
> -             I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
> -                        engine->irq_keep_mask);
> -             POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
> -     }
> -     GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
> -                engine->irq_keep_mask);
> -
> -     clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -}
> -
>  static void reset_common_ring(struct intel_engine_cs *engine,
>                             struct i915_request *request)
>  {
> @@ -1699,8 +1702,6 @@ static void reset_common_ring(struct intel_engine_cs 
> *engine,
>       /* See execlists_cancel_requests() for the irq/spinlock split. */
>       local_irq_save(flags);
>  
> -     reset_irq(engine);
> -
>       /*
>        * Catch up with any missed context-switch interrupts.
>        *
> @@ -1711,15 +1712,13 @@ static void reset_common_ring(struct intel_engine_cs 
> *engine,
>        * requests were completed.
>        */
>       execlists_cancel_port_requests(execlists);
> +     reset_irq(engine);
>  
>       /* Push back any incomplete requests for replay after the reset. */
>       spin_lock(&engine->timeline->lock);
>       __unwind_incomplete_requests(engine);
>       spin_unlock(&engine->timeline->lock);
>  
> -     /* Mark all CS interrupts as complete */
> -     execlists->active = 0;
> -
>       local_irq_restore(flags);
>  
>       /*
> -- 
> 2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to