Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-23 Thread Chris Wilson
Quoting Mika Kuoppala (2018-02-23 15:18:17)
> Chris Wilson  writes:
> 
> > Sometimes we need to boost the priority of an in-flight request, which
> > may lead to the situation where the second submission port then contains
> > a higher priority context than the first and so we need to inject a
> > preemption event. To do so we must always check inside
> > execlists_dequeue() whether there is a priority inversion between the
> > ports themselves as well as the head of the priority sorted queue, and we
> > cannot just skip dequeuing if the queue is empty.
> >
> > Signed-off-by: Chris Wilson 
> > Cc: Michał Winiarski 
> > Cc: Michel Thierry 
> > Cc: Mika Kuoppala 
> > Cc: Tvrtko Ursulin 
> 
> Reviewed-by: Mika Kuoppala 

Added detail to @queue_priority and pushed. Thanks for the review!
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-23 Thread Mika Kuoppala
Chris Wilson  writes:

> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson 
> Cc: Michał Winiarski 
> Cc: Michel Thierry 
> Cc: Mika Kuoppala 
> Cc: Tvrtko Ursulin 

Reviewed-by: Mika Kuoppala 

> ---
> Rebase for conflicts
> -Chris
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +
>  drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
>  drivers/gpu/drm/i915/intel_lrc.c| 161 
> 
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +
>  4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct 
> intel_engine_cs *engine)
>   BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>   GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>  
> + execlists->queue_priority = INT_MIN;
>   execlists->queue = RB_ROOT;
>   execlists->first = NULL;
>  }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   spin_lock_irq(>timeline->lock);
>   list_for_each_entry(rq, >timeline->requests, link)
>   print_request(m, rq, "\t\tE ");
> + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>   for (rb = execlists->first; rb; rb = rb_next(rb)) {
>   struct i915_priolist *p =
>   rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
>   *
>   */
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
>  static inline bool is_high_priority(struct intel_guc_client *client)
>  {
>   return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   rb = execlists->first;
>   GEM_BUG_ON(rb_first(>queue) != rb);
>  
> - if (!rb)
> - goto unlock;
> -
>   if (port_isset(port)) {
>   if (engine->i915->preempt_context) {
>   struct guc_preempt_work *preempt_work =
>   >i915->guc.preempt_work[engine->id];
>  
> - if (rb_entry(rb, struct i915_priolist, node)->priority >
> + if (execlists->queue_priority >
>   max(port_request(port)->priotree.priority, 0)) {
>   execlists_set_active(execlists,
>EXECLISTS_ACTIVE_PREEMPT);
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   }
>   GEM_BUG_ON(port_isset(port));
>  
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
>   struct i915_request *rq, *rn;
>  
>   list_for_each_entry_safe(rq, rn, >requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   INIT_LIST_HEAD(>requests);
>   if (p->priority != I915_PRIORITY_NORMAL)
>   kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
>  done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>   execlists->first = rb;
>   if (submit) {
>   port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 964885b5d7cb..4bc72fbaf793 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
>struct intel_engine_cs *engine,
>struct intel_ring *ring);
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node 

Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-23 Thread Chris Wilson
Quoting Mika Kuoppala (2018-02-23 14:06:06)
> Chris Wilson  writes:
> >  static inline bool is_high_priority(struct intel_guc_client *client)
> >  {
> >   return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> > @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs 
> > *engine)
> >   rb = execlists->first;
> >   GEM_BUG_ON(rb_first(>queue) != rb);
> >  
> > - if (!rb)
> > - goto unlock;
> > -
> >   if (port_isset(port)) {
> >   if (engine->i915->preempt_context) {
> >   struct guc_preempt_work *preempt_work =
> >   >i915->guc.preempt_work[engine->id];
> >  
> > - if (rb_entry(rb, struct i915_priolist, 
> > node)->priority >
> > + if (execlists->queue_priority >
> >   max(port_request(port)->priotree.priority, 0)) {
> >   execlists_set_active(execlists,
> >
> > EXECLISTS_ACTIVE_PREEMPT);
> 
> This is the priority inversion part? We preempt and clear the ports
> to rearrange if the last port has a higher priority request?

Yes, along with a strong kick from

> > @@ -1050,8 +1068,9 @@ static void execlists_schedule(struct i915_request 
> > *request, int prio)
> >   pt->priority = prio;
> >   if (!list_empty(>link)) {
> >   __list_del_entry(>link);
> > - insert_request(engine, pt, prio);
> > + queue_request(engine, pt, prio);
> >   }
> > + submit_queue(engine, prio);

So that we re-evaluate ELSP if the active prio change.

> > @@ -264,7 +281,7 @@ lookup_priolist(struct intel_engine_cs *engine,
> >   if (first)
> >   execlists->first = >node;
> >  
> > - return ptr_pack_bits(p, first, 1);
> > + return p;
> 
> Hmm there is no need for decode first as we always resubmit
> the queue depending on the prio?

Right, the first bit is now checked against queue_priority instead. If
we are higher priority than the queue, we must rerun the tasklet.
Whereas before we knew we only had to do it if we inserted into the
start of the queue.

> > @@ -453,12 +467,17 @@ static void inject_preempt_context(struct 
> > intel_engine_cs *engine)
> >  _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> > CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> >  
> > + /*
> > +  * Switch to our empty preempt context so
> > +  * the state of the GPU is known (idle).
> > +  */
> >   GEM_TRACE("%s\n", engine->name);
> >   for (n = execlists_num_ports(>execlists); --n; )
> >   elsp_write(0, engine->execlists.elsp);
> >  
> >   elsp_write(ce->lrc_desc, engine->execlists.elsp);
> >   execlists_clear_active(>execlists, EXECLISTS_ACTIVE_HWACK);
> > + execlists_set_active(>execlists, EXECLISTS_ACTIVE_PREEMPT);
> 
> Surely a better place. Now looking at this would it be more prudent to
> move both the clear and set right before the last elsp write?
> 
> Well I guess it really doesn't matter as we hold the timeline lock.

And we only process ELSP from a single cpu, so it's all sequential,
right.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-23 Thread Mika Kuoppala
Chris Wilson  writes:

> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
>
> Signed-off-by: Chris Wilson 
> Cc: Michał Winiarski 
> Cc: Michel Thierry 
> Cc: Mika Kuoppala 
> Cc: Tvrtko Ursulin 
> ---
> Rebase for conflicts
> -Chris
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +
>  drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
>  drivers/gpu/drm/i915/intel_lrc.c| 161 
> 
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +
>  4 files changed, 107 insertions(+), 78 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct 
> intel_engine_cs *engine)
>   BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>   GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>  
> + execlists->queue_priority = INT_MIN;
>   execlists->queue = RB_ROOT;
>   execlists->first = NULL;
>  }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   spin_lock_irq(>timeline->lock);
>   list_for_each_entry(rq, >timeline->requests, link)
>   print_request(m, rq, "\t\tE ");
> + drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
>   for (rb = execlists->first; rb; rb = rb_next(rb)) {
>   struct i915_priolist *p =
>   rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
>   *
>   */
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
>  static inline bool is_high_priority(struct intel_guc_client *client)
>  {
>   return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   rb = execlists->first;
>   GEM_BUG_ON(rb_first(>queue) != rb);
>  
> - if (!rb)
> - goto unlock;
> -
>   if (port_isset(port)) {
>   if (engine->i915->preempt_context) {
>   struct guc_preempt_work *preempt_work =
>   >i915->guc.preempt_work[engine->id];
>  
> - if (rb_entry(rb, struct i915_priolist, node)->priority >
> + if (execlists->queue_priority >
>   max(port_request(port)->priotree.priority, 0)) {
>   execlists_set_active(execlists,
>EXECLISTS_ACTIVE_PREEMPT);

This is the priority inversion part? We preempt and clear the ports
to rearrange if the last port has a higher priority request?

> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   }
>   GEM_BUG_ON(port_isset(port));
>  
> - do {
> - struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> + while (rb) {
> + struct i915_priolist *p = to_priolist(rb);
>   struct i915_request *rq, *rn;
>  
>   list_for_each_entry_safe(rq, rn, >requests, priotree.link) {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
>   INIT_LIST_HEAD(>requests);
>   if (p->priority != I915_PRIORITY_NORMAL)
>   kmem_cache_free(engine->i915->priorities, p);
> - } while (rb);
> + }
>  done:
> + execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
>   execlists->first = rb;
>   if (submit) {
>   port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 964885b5d7cb..4bc72fbaf793 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
>struct intel_engine_cs *engine,
>struct intel_ring *ring);
>  

Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-23 Thread Chris Wilson
Quoting Chris Wilson (2018-02-22 14:11:54)
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.

Michał noted this doesn't extend past 2-port submission as simply as I
thought it might. Nevertheless it does solve the problem we have today
of priority inversion within ELSP[2]. Extending the submission model as
a whole to more ports is left as an exercise to the reader. :|
-Chris

> Signed-off-by: Chris Wilson 
> Cc: Michał Winiarski 
> Cc: Michel Thierry 
> Cc: Mika Kuoppala 
> Cc: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +
>  drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
>  drivers/gpu/drm/i915/intel_lrc.c| 161 
> 
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +
>  4 files changed, 107 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/intel_engine_cs.c
> index c31544406974..ce7fcf55ba18 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct 
> intel_engine_cs *engine)
> BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>  
> +   execlists->queue_priority = INT_MIN;
> execlists->queue = RB_ROOT;
> execlists->first = NULL;
>  }
> @@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> spin_lock_irq(>timeline->lock);
> list_for_each_entry(rq, >timeline->requests, link)
> print_request(m, rq, "E ");
> +   drm_printf(m, "Queue priority: %d\n", 
> execlists->queue_priority);
> for (rb = execlists->first; rb; rb = rb_next(rb)) {
> struct i915_priolist *p =
> rb_entry(rb, typeof(*p), node);
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 649113c7a3c2..586dde579903 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -75,6 +75,11 @@
>   *
>   */
>  
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> +   return rb_entry(rb, struct i915_priolist, node);
> +}
> +
>  static inline bool is_high_priority(struct intel_guc_client *client)
>  {
> return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
> @@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> rb = execlists->first;
> GEM_BUG_ON(rb_first(>queue) != rb);
>  
> -   if (!rb)
> -   goto unlock;
> -
> if (port_isset(port)) {
> if (engine->i915->preempt_context) {
> struct guc_preempt_work *preempt_work =
> >i915->guc.preempt_work[engine->id];
>  
> -   if (rb_entry(rb, struct i915_priolist, 
> node)->priority >
> +   if (execlists->queue_priority >
> max(port_request(port)->priotree.priority, 0)) {
> execlists_set_active(execlists,
>  
> EXECLISTS_ACTIVE_PREEMPT);
> @@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> }
> GEM_BUG_ON(port_isset(port));
>  
> -   do {
> -   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> +   while (rb) {
> +   struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>  
> list_for_each_entry_safe(rq, rn, >requests, priotree.link) 
> {
> @@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> INIT_LIST_HEAD(>requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> -   } while (rb);
> +   }
>  done:
> +   execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> execlists->first = rb;
> if (submit) {
> port_assign(port, last);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index e781c912f197..8a98da7a5c83 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ 

Re: [Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-22 Thread Chris Wilson
Quoting Chris Wilson (2018-02-22 14:22:29)
> Sometimes we need to boost the priority of an in-flight request, which
> may lead to the situation where the second submission port then contains
> a higher priority context than the first and so we need to inject a
> preemption event. To do so we must always check inside
> execlists_dequeue() whether there is a priority inversion between the
> ports themselves as well as the head of the priority sorted queue, and we
> cannot just skip dequeuing if the queue is empty.
> 
> Signed-off-by: Chris Wilson 
> Cc: Michał Winiarski 
> Cc: Michel Thierry 
> Cc: Mika Kuoppala 
> Cc: Tvrtko Ursulin 

Note this probably merits a Fixes tag. Outside of igt causing priority
inversion between GPU hogs, is it a big enough problem to deserve
sending to stable@?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-22 Thread Chris Wilson
Sometimes we need to boost the priority of an in-flight request, which
may lead to the situation where the second submission port then contains
a higher priority context than the first and so we need to inject a
preemption event. To do so we must always check inside
execlists_dequeue() whether there is a priority inversion between the
ports themselves as well as the head of the priority sorted queue, and we
cannot just skip dequeuing if the queue is empty.

Signed-off-by: Chris Wilson 
Cc: Michał Winiarski 
Cc: Michel Thierry 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
---
Rebase for conflicts
-Chris
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +
 drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
 drivers/gpu/drm/i915/intel_lrc.c| 161 
 drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +
 4 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index c31544406974..ce7fcf55ba18 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct 
intel_engine_cs *engine)
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+   execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
 }
@@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irq(>timeline->lock);
list_for_each_entry(rq, >timeline->requests, link)
print_request(m, rq, "\t\tE ");
+   drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = execlists->first; rb; rb = rb_next(rb)) {
struct i915_priolist *p =
rb_entry(rb, typeof(*p), node);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 649113c7a3c2..586dde579903 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -75,6 +75,11 @@
  *
  */
 
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+   return rb_entry(rb, struct i915_priolist, node);
+}
+
 static inline bool is_high_priority(struct intel_guc_client *client)
 {
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
rb = execlists->first;
GEM_BUG_ON(rb_first(>queue) != rb);
 
-   if (!rb)
-   goto unlock;
-
if (port_isset(port)) {
if (engine->i915->preempt_context) {
struct guc_preempt_work *preempt_work =
>i915->guc.preempt_work[engine->id];
 
-   if (rb_entry(rb, struct i915_priolist, node)->priority >
+   if (execlists->queue_priority >
max(port_request(port)->priotree.priority, 0)) {
execlists_set_active(execlists,
 EXECLISTS_ACTIVE_PREEMPT);
@@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(port_isset(port));
 
-   do {
-   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+   while (rb) {
+   struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
 
list_for_each_entry_safe(rq, rn, >requests, priotree.link) {
@@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(>requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
-   } while (rb);
+   }
 done:
+   execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
port_assign(port, last);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 964885b5d7cb..4bc72fbaf793 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
 struct intel_engine_cs *engine,
 struct intel_ring *ring);
 
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+   return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+   return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,

[Intel-gfx] [PATCH] drm/i915/preemption: Allow preemption between submission ports

2018-02-22 Thread Chris Wilson
Sometimes we need to boost the priority of an in-flight request, which
may lead to the situation where the second submission port then contains
a higher priority context than the first and so we need to inject a
preemption event. To do so we must always check inside
execlists_dequeue() whether there is a priority inversion between the
ports themselves as well as the head of the priority sorted queue, and we
cannot just skip dequeuing if the queue is empty.

Signed-off-by: Chris Wilson 
Cc: Michał Winiarski 
Cc: Michel Thierry 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |   2 +
 drivers/gpu/drm/i915/intel_guc_submission.c |  17 +--
 drivers/gpu/drm/i915/intel_lrc.c| 161 
 drivers/gpu/drm/i915/intel_ringbuffer.h |   5 +
 4 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index c31544406974..ce7fcf55ba18 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -423,6 +423,7 @@ static void intel_engine_init_execlist(struct 
intel_engine_cs *engine)
BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
 
+   execlists->queue_priority = INT_MIN;
execlists->queue = RB_ROOT;
execlists->first = NULL;
 }
@@ -1903,6 +1904,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irq(>timeline->lock);
list_for_each_entry(rq, >timeline->requests, link)
print_request(m, rq, "\t\tE ");
+   drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = execlists->first; rb; rb = rb_next(rb)) {
struct i915_priolist *p =
rb_entry(rb, typeof(*p), node);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 649113c7a3c2..586dde579903 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -75,6 +75,11 @@
  *
  */
 
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+   return rb_entry(rb, struct i915_priolist, node);
+}
+
 static inline bool is_high_priority(struct intel_guc_client *client)
 {
return (client->priority == GUC_CLIENT_PRIORITY_KMD_HIGH ||
@@ -682,15 +687,12 @@ static void guc_dequeue(struct intel_engine_cs *engine)
rb = execlists->first;
GEM_BUG_ON(rb_first(>queue) != rb);
 
-   if (!rb)
-   goto unlock;
-
if (port_isset(port)) {
if (engine->i915->preempt_context) {
struct guc_preempt_work *preempt_work =
>i915->guc.preempt_work[engine->id];
 
-   if (rb_entry(rb, struct i915_priolist, node)->priority >
+   if (execlists->queue_priority >
max(port_request(port)->priotree.priority, 0)) {
execlists_set_active(execlists,
 EXECLISTS_ACTIVE_PREEMPT);
@@ -706,8 +708,8 @@ static void guc_dequeue(struct intel_engine_cs *engine)
}
GEM_BUG_ON(port_isset(port));
 
-   do {
-   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+   while (rb) {
+   struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
 
list_for_each_entry_safe(rq, rn, >requests, priotree.link) {
@@ -736,8 +738,9 @@ static void guc_dequeue(struct intel_engine_cs *engine)
INIT_LIST_HEAD(>requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
-   } while (rb);
+   }
 done:
+   execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
port_assign(port, last);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e781c912f197..8a98da7a5c83 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -169,6 +169,23 @@ static void execlists_init_reg_state(u32 *reg_state,
 struct intel_engine_cs *engine,
 struct intel_ring *ring);
 
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+   return rb_entry(rb, struct i915_priolist, node);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+   return rq->priotree.priority;
+}
+
+static inline bool need_preempt(const struct intel_engine_cs *engine,
+