Re: [Intel-gfx] [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Quoting Tvrtko Ursulin (2018-09-24 11:25:37) > > On 19/09/2018 20:55, Chris Wilson wrote: > > As we are about to allow ourselves to slightly bump the user priority > > into a few different sublevels, packthose internal priority lists > > into the same i915_priolist to keep the rbtree compact and avoid having > > to allocate the default user priority even after the internal bumping. > > The downside to having an requests[] rather than a node per active list, > > is that we then have to walk over the empty higher priority lists. To > > compensate, we track the active buckets and use a small bitmap to skip > > over any inactive ones. > > > > Signed-off-by: Chris Wilson > > --- > > drivers/gpu/drm/i915/intel_engine_cs.c | 6 +- > > drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++- > > drivers/gpu/drm/i915/intel_lrc.c| 87 ++--- > > drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++- > > 4 files changed, 80 insertions(+), 38 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c > > b/drivers/gpu/drm/i915/intel_engine_cs.c > > index 217ed3ee1cab..83f2f7774c1f 100644 > > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > > @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs > > *engine, > > count = 0; > > drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); > > for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) { > > - struct i915_priolist *p = > > - rb_entry(rb, typeof(*p), node); > > + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); > > + int i; > > > > - list_for_each_entry(rq, >requests, sched.link) { > > + priolist_for_each_request(rq, p, i) { > > if (count++ < MAX_REQUESTS_TO_SHOW - 1) > > print_request(m, rq, "\t\tQ "); > > else > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c > > b/drivers/gpu/drm/i915/intel_guc_submission.c > > index 6f693ef62c64..8531bd917ec3 100644 > > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > > @@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs > > *engine) > > while ((rb = rb_first_cached(>queue))) { > > struct i915_priolist *p = to_priolist(rb); > > struct i915_request *rq, *rn; > > + int i; > > > > - list_for_each_entry_safe(rq, rn, >requests, sched.link) { > > + priolist_for_each_request_consume(rq, rn, p, i) { > > Hm consumed clears the bitmask every time, but we are not certain yet we > will consume the request. We clear it after the loop, so when we take the early break the bitmask is unaffected. > > if (last && rq->hw_context != last->hw_context) { > > - if (port == last_port) { > > - __list_del_many(>requests, > > - >sched.link); > > + if (port == last_port) > > goto done; > > - } > > > > if (submit) > > port_assign(port, last); > > port++; > > } > > > > - INIT_LIST_HEAD(>sched.link); > > + list_del_init(>sched.link); > > > > __i915_request_submit(rq); > > trace_i915_request_in(rq, port_index(port, > > execlists)); > > + > > last = rq; > > submit = true; > > } > > > > rb_erase_cached(>node, >queue); > > - INIT_LIST_HEAD(>requests); > > if (p->priority != I915_PRIORITY_NORMAL) > > kmem_cache_free(engine->i915->priorities, p); > > } > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > > b/drivers/gpu/drm/i915/intel_lrc.c > > index e8de250c3413..aeae82b5223c 100644 > > --- a/drivers/gpu/drm/i915/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/intel_lrc.c > > @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct > > i915_gem_context *ctx, > > ce->lrc_desc = desc; > > } > > > > -static struct i915_priolist * > > +static void assert_priolists(struct intel_engine_execlists * const > > execlists, > > + int queue_priority) > > +{ > > + struct rb_node *rb; > > + int last_prio, i; > > + > > + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > > + return; > > + > > + GEM_BUG_ON(rb_first_cached(>queue) != > > +rb_first(>queue.rb_root)); > > + > > + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; > > > > + for (rb
Re: [Intel-gfx] [PATCH 14/40] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
On 19/09/2018 20:55, Chris Wilson wrote: As we are about to allow ourselves to slightly bump the user priority into a few different sublevels, packthose internal priority lists into the same i915_priolist to keep the rbtree compact and avoid having to allocate the default user priority even after the internal bumping. The downside to having an requests[] rather than a node per active list, is that we then have to walk over the empty higher priority lists. To compensate, we track the active buckets and use a small bitmap to skip over any inactive ones. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++- drivers/gpu/drm/i915/intel_lrc.c| 87 ++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++- 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 217ed3ee1cab..83f2f7774c1f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = - rb_entry(rb, typeof(*p), node); + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + int i; - list_for_each_entry(rq, >requests, sched.link) { + priolist_for_each_request(rq, p, i) { if (count++ < MAX_REQUESTS_TO_SHOW - 1) print_request(m, rq, "\t\tQ "); else diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 6f693ef62c64..8531bd917ec3 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -726,30 +726,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) while ((rb = rb_first_cached(>queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; + int i; - list_for_each_entry_safe(rq, rn, >requests, sched.link) { + priolist_for_each_request_consume(rq, rn, p, i) { Hm consumed clears the bitmask every time, but we are not certain yet we will consume the request. if (last && rq->hw_context != last->hw_context) { - if (port == last_port) { - __list_del_many(>requests, - >sched.link); + if (port == last_port) goto done; - } if (submit) port_assign(port, last); port++; } - INIT_LIST_HEAD(>sched.link); + list_del_init(>sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); + last = rq; submit = true; } rb_erase_cached(>node, >queue); - INIT_LIST_HEAD(>requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e8de250c3413..aeae82b5223c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, ce->lrc_desc = desc; } -static struct i915_priolist * +static void assert_priolists(struct intel_engine_execlists * const execlists, +int queue_priority) +{ + struct rb_node *rb; + int last_prio, i; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + GEM_BUG_ON(rb_first_cached(>queue) != + rb_first(>queue.rb_root)); + + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; > + for (rb = rb_first_cached(>queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = to_priolist(rb); + + GEM_BUG_ON(p->priority >= last_prio); + last_prio = p->priority; + + GEM_BUG_ON(!p->used); + for (i = 0; i < ARRAY_SIZE(p->requests); i++) { + if (list_empty(>requests[i])) + continue; Asserting that bitmask slot is not set if list is empty is not interesting? + + GEM_BUG_ON(!(p->used & BIT(i))); +