Chris Wilson <[email protected]> writes:

> If the preempted context takes too long to relinquish control, e.g. it
> is stuck inside a shader with arbitration disabled, evict that context
> with an engine reset. This ensures that preemptions are reasonably
> responsive, providing a tighter QoS for the more important context at
> the cost of flagging unresponsive contexts more frequently (i.e. instead
> of using an ~10s hangcheck, we now evict at ~10ms).  The challenge of
> lies in picking a timeout that can be reasonably serviced by HW for
> typical workloads, balancing the existing clients against the needs for
> responsiveness.
>
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Mika Kuoppala <[email protected]>
> Cc: Tvrtko Ursulin <[email protected]>
> ---
>  drivers/gpu/drm/i915/Kconfig.profile | 12 ++++++
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 56 ++++++++++++++++++++++++++--
>  2 files changed, 65 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile 
> b/drivers/gpu/drm/i915/Kconfig.profile
> index 48df8889a88a..8273d3baafe4 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -25,3 +25,15 @@ config DRM_I915_SPIN_REQUEST
>         May be 0 to disable the initial spin. In practice, we estimate
>         the cost of enabling the interrupt (if currently disabled) to be
>         a few microseconds.
> +
> +config DRM_I915_PREEMPT_TIMEOUT
> +     int "Preempt timeout (ms)"
> +     default 10 # milliseconds
> +     help
> +       How long to wait (in milliseconds) for a preemption event to occur
> +       when submitting a new context via execlists. If the current context
> +       does not hit an arbitration point and yield to HW before the timer
> +       expires, the HW will be reset to allow the more important context
> +       to execute.
> +
> +       May be 0 to disable the timeout.
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index fca79adb4aa3..e8d7deba3e49 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -889,6 +889,15 @@ enable_timeslice(struct intel_engine_cs *engine)
>       return last && need_timeslice(engine, last);
>  }
>  
> +static unsigned long preempt_expires(void)
> +{
> +     unsigned long timeout =

could be const

> +             msecs_to_jiffies_timeout(CONFIG_DRM_I915_PREEMPT_TIMEOUT);
> +
> +     barrier();

This needs a comment. I fail to connect the dots as jiffies
is volatile by nature.

> +     return jiffies + timeout;
> +}
> +
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
>       struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1220,6 +1229,9 @@ static void execlists_dequeue(struct intel_engine_cs 
> *engine)
>               *port = execlists_schedule_in(last, port - execlists->pending);
>               memset(port + 1, 0, (last_port - port) * sizeof(*port));
>               execlists_submit_ports(engine);
> +
> +             if (CONFIG_DRM_I915_PREEMPT_TIMEOUT)
> +                     mod_timer(&execlists->timer, preempt_expires());
>       }
>  }
>  
> @@ -1376,13 +1388,48 @@ static void process_csb(struct intel_engine_cs 
> *engine)
>       invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
>  }
>  
> -static void __execlists_submission_tasklet(struct intel_engine_cs *const 
> engine)
> +static bool __execlists_submission_tasklet(struct intel_engine_cs *const 
> engine)
>  {
>       lockdep_assert_held(&engine->active.lock);
>  
>       process_csb(engine);
> -     if (!engine->execlists.pending[0])
> +     if (!engine->execlists.pending[0]) {
>               execlists_dequeue(engine);
> +             return true;
> +     }
> +
> +     return false;
> +}
> +
> +static void preempt_reset(struct intel_engine_cs *engine)
> +{
> +     const unsigned int bit = I915_RESET_ENGINE + engine->id;
> +     unsigned long *lock = &engine->i915->gpu_error.flags;
> +
> +     if (test_and_set_bit(bit, lock))
> +             return;
> +
> +     tasklet_disable_nosync(&engine->execlists.tasklet);
> +     spin_unlock(&engine->active.lock);
> +

Why do we need to drop the lock?
-Mika

> +     i915_reset_engine(engine, "preemption time out");
> +
> +     spin_lock(&engine->active.lock);
> +     tasklet_enable(&engine->execlists.tasklet);
> +
> +     clear_bit(bit, lock);
> +     wake_up_bit(lock, bit);
> +}
> +
> +static bool preempt_timeout(struct intel_engine_cs *const engine)
> +{
> +     if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
> +             return false;
> +
> +     if (!intel_engine_has_preemption(engine))
> +             return false;
> +
> +     return !timer_pending(&engine->execlists.timer);
>  }
>  
>  /*
> @@ -1395,7 +1442,10 @@ static void execlists_submission_tasklet(unsigned long 
> data)
>       unsigned long flags;
>  
>       spin_lock_irqsave(&engine->active.lock, flags);
> -     __execlists_submission_tasklet(engine);
> +
> +     if (!__execlists_submission_tasklet(engine) && preempt_timeout(engine))
> +             preempt_reset(engine);
> +
>       spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to