Re: [Intel-gfx] [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket

2018-09-25 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-09-24 11:25:37)
> 
> On 19/09/2018 20:55, Chris Wilson wrote:
> > As we are about to allow ourselves to slightly bump the user priority
> > into a few different sublevels, packthose internal priority lists
> > into the same i915_priolist to keep the rbtree compact and avoid having
> > to allocate the default user priority even after the internal bumping.
> > The downside to having an requests[] rather than a node per active list,
> > is that we then have to walk over the empty higher priority lists. To
> > compensate, we track the active buckets and use a small bitmap to skip
> > over any inactive ones.
> > 
> > Signed-off-by: Chris Wilson 
> > ---
> >   drivers/gpu/drm/i915/intel_engine_cs.c  |  6 +-
> >   drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c| 87 ++---
> >   drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++-
> >   4 files changed, 80 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/intel_engine_cs.c
> > index 217ed3ee1cab..83f2f7774c1f 100644
> > --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs 
> > *engine,
> >   count = 0;
> >   drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> >   for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) {
> > - struct i915_priolist *p =
> > - rb_entry(rb, typeof(*p), node);
> > + struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
> > + int i;
> >   
> > - list_for_each_entry(rq, >requests, sched.link) {
> > + priolist_for_each_request(rq, p, i) {
> >   if (count++ < MAX_REQUESTS_TO_SHOW - 1)
> >   print_request(m, rq, "\t\tQ ");
> >   else
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 6f693ef62c64..8531bd917ec3 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs 
> > *engine)
> >   while ((rb = rb_first_cached(>queue))) {
> >   struct i915_priolist *p = to_priolist(rb);
> >   struct i915_request *rq, *rn;
> > + int i;
> >   
> > - list_for_each_entry_safe(rq, rn, >requests, sched.link) {
> > + priolist_for_each_request_consume(rq, rn, p, i) {
> 
> Hm consumed clears the bitmask every time, but we are not certain yet we 
> will consume the request.

We clear it after the loop, so when we take the early break the bitmask
is unaffected.

> >   if (last && rq->hw_context != last->hw_context) {
> > - if (port == last_port) {
> > - __list_del_many(>requests,
> > - >sched.link);
> > + if (port == last_port)
> >   goto done;
> > - }
> >   
> >   if (submit)
> >   port_assign(port, last);
> >   port++;
> >   }
> >   
> > - INIT_LIST_HEAD(>sched.link);
> > + list_del_init(>sched.link);
> >   
> >   __i915_request_submit(rq);
> >   trace_i915_request_in(rq, port_index(port, 
> > execlists));
> > +
> >   last = rq;
> >   submit = true;
> >   }
> >   
> >   rb_erase_cached(>node, >queue);
> > - INIT_LIST_HEAD(>requests);
> >   if (p->priority != I915_PRIORITY_NORMAL)
> >   kmem_cache_free(engine->i915->priorities, p);
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> > b/drivers/gpu/drm/i915/intel_lrc.c
> > index e8de250c3413..aeae82b5223c 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct 
> > i915_gem_context *ctx,
> >   ce->lrc_desc = desc;
> >   }
> >   
> > -static struct i915_priolist *
> > +static void assert_priolists(struct intel_engine_execlists * const 
> > execlists,
> > +  int queue_priority)
> > +{
> > + struct rb_node *rb;
> > + int last_prio, i;
> > +
> > + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> > + return;
> > +
> > + GEM_BUG_ON(rb_first_cached(>queue) !=
> > +rb_first(>queue.rb_root));
> > +
> > + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
>  >
> > + for (rb 

Re: [Intel-gfx] [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket

2018-09-24 Thread Tvrtko Ursulin


On 19/09/2018 20:55, Chris Wilson wrote:

As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/intel_engine_cs.c  |  6 +-
  drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
  drivers/gpu/drm/i915/intel_lrc.c| 87 ++---
  drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++-
  4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 217ed3ee1cab..83f2f7774c1f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) {
-   struct i915_priolist *p =
-   rb_entry(rb, typeof(*p), node);
+   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+   int i;
  
-		list_for_each_entry(rq, >requests, sched.link) {

+   priolist_for_each_request(rq, p, i) {
if (count++ < MAX_REQUESTS_TO_SHOW - 1)
print_request(m, rq, "\t\tQ ");
else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 6f693ef62c64..8531bd917ec3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(>queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
+   int i;
  
-		list_for_each_entry_safe(rq, rn, >requests, sched.link) {

+   priolist_for_each_request_consume(rq, rn, p, i) {


Hm consumed clears the bitmask every time, but we are not certain yet we 
will consume the request.



if (last && rq->hw_context != last->hw_context) {
-   if (port == last_port) {
-   __list_del_many(>requests,
-   >sched.link);
+   if (port == last_port)
goto done;
-   }
  
  if (submit)

port_assign(port, last);
port++;
}
  
-			INIT_LIST_HEAD(>sched.link);

+   list_del_init(>sched.link);
  
  			__i915_request_submit(rq);

trace_i915_request_in(rq, port_index(port, execlists));
+
last = rq;
submit = true;
}
  
  		rb_erase_cached(>node, >queue);

-   INIT_LIST_HEAD(>requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e8de250c3413..aeae82b5223c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct 
i915_gem_context *ctx,
ce->lrc_desc = desc;
  }
  
-static struct i915_priolist *

+static void assert_priolists(struct intel_engine_execlists * const execlists,
+int queue_priority)
+{
+   struct rb_node *rb;
+   int last_prio, i;
+
+   if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+   return;
+
+   GEM_BUG_ON(rb_first_cached(>queue) !=
+  rb_first(>queue.rb_root));
+
+   last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;

>

+   for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) {
+   struct i915_priolist *p = to_priolist(rb);
+
+   GEM_BUG_ON(p->priority >= last_prio);
+   last_prio = p->priority;
+
+   GEM_BUG_ON(!p->used);
+   for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+   if (list_empty(>requests[i]))
+   continue;


Asserting that bitmask slot is not set if list is empty is not interesting?


+
+   GEM_BUG_ON(!(p->used & BIT(i)));
+