execlists: Suppress redundant preemption

Chris Wilson Tue, 29 Jan 2019 18:26:22 -0800

On unwinding the active request we give it a small (limited to internal
priority levels) boost to prevent it from being gazumped a second time.
However, this means that it can be promoted to above the request that
triggered the preemption request, causing a preempt-to-idle cycle for no
change. We can avoid this if we take the boost into account when
checking if the preemption request is valid.


v2: After preemption the active request will be after the preemptee if
they end up with equal priority.

v3: Tvrtko pointed out that this, the existing logic, makes
I915_PRIORITY_WAIT non-preemptible. Document this interesting quirk!

v4: Prove Tvrtko was right about WAIT being non-preemptible and test it.
v5: Except not all priorities were made equal, and the WAIT not preempting
is only if we start off as !NEWCLIENT.

Signed-off-by: Chris Wilson <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
---
 drivers/gpu/drm/i915/intel_lrc.c             |  45 +++++-
 drivers/gpu/drm/i915/selftests/igt_spinner.c |   9 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c   | 159 +++++++++++++++++++
 3 files changed, 208 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a9eb0211ce77..2616b0b3e8d5 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,6 +164,8 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
+
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
                                            struct intel_engine_cs *engine,
                                            struct intel_context *ce);
@@ -188,6 +190,34 @@ static inline int rq_prio(const struct i915_request *rq)
        return rq->sched.attr.priority;
 }
 
+static inline int active_prio(const struct i915_request *rq)
+{
+       int prio = rq_prio(rq);
+
+       /*
+        * On unwinding the active request, we give it a priority bump
+        * equivalent to a freshly submitted request. This protects it from
+        * being gazumped again, but it would be preferable if we didn't
+        * let it be gazumped in the first place!
+        *
+        * See __unwind_incomplete_requests()
+        */
+       if ((prio & ACTIVE_PRIORITY) != ACTIVE_PRIORITY &&
+           i915_request_started(rq)) {
+               /*
+                * After preemption, we insert the active request at the
+                * end of the new priority level. This means that we will be
+                * _lower_ priority than the preemptee all things equal (and
+                * so the preemption is valid), so adjust our comparison
+                * accordingly.
+                */
+               prio |= ACTIVE_PRIORITY;
+               prio--;
+       }
+
+       return prio;
+}
+
 static int queue_prio(const struct intel_engine_execlists *execlists)
 {
        struct i915_priolist *p;
@@ -208,7 +238,7 @@ static int queue_prio(const struct intel_engine_execlists 
*execlists)
 static inline bool need_preempt(const struct intel_engine_cs *engine,
                                const struct i915_request *rq)
 {
-       const int last_prio = rq_prio(rq);
+       int last_prio;
 
        if (!intel_engine_has_preemption(engine))
                return false;
@@ -228,6 +258,7 @@ static inline bool need_preempt(const struct 
intel_engine_cs *engine,
         * preempt. If that hint is stale or we may be trying to preempt
         * ourselves, ignore the request.
         */
+       last_prio = active_prio(rq);
        if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
                                      last_prio))
                return false;
@@ -353,7 +384,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
        struct i915_request *rq, *rn, *active = NULL;
        struct list_head *uninitialized_var(pl);
-       int prio = I915_PRIORITY_INVALID | I915_PRIORITY_NEWCLIENT;
+       int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
 
        lockdep_assert_held(&engine->timeline.lock);
 
@@ -384,9 +415,15 @@ __unwind_incomplete_requests(struct intel_engine_cs 
*engine)
         * The active request is now effectively the start of a new client
         * stream, so give it the equivalent small priority bump to prevent
         * it being gazumped a second time by another peer.
+        *
+        * One consequence of this preemption boost is that we may jump
+        * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
+        * making those priorities non-preemptible. They will be moved forward
+        * in the priority queue, but they will not gain immediate access to
+        * the GPU.
         */
-       if (!(prio & I915_PRIORITY_NEWCLIENT)) {
-               prio |= I915_PRIORITY_NEWCLIENT;
+       if ((prio & ACTIVE_PRIORITY) != ACTIVE_PRIORITY) {
+               prio |= ACTIVE_PRIORITY;
                active->sched.attr.priority = prio;
                list_move_tail(&active->sched.link,
                               i915_sched_lookup_priolist(engine, prio));
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c 
b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 9ebd9225684e..86354e51bdd3 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -142,10 +142,17 @@ igt_spinner_create_request(struct igt_spinner *spin,
        *batch++ = upper_32_bits(vma->node.start);
        *batch++ = MI_BATCH_BUFFER_END; /* not reached */
 
-       i915_gem_chipset_flush(spin->i915);
+       if (engine->emit_init_breadcrumb &&
+           rq->timeline->has_initial_breadcrumb) {
+               err = engine->emit_init_breadcrumb(rq);
+               if (err)
+                       goto cancel_rq;
+       }
 
        err = engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
 
+       i915_gem_chipset_flush(spin->i915);
+
 cancel_rq:
        if (err) {
                i915_request_skip(rq, err);
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c 
b/drivers/gpu/drm/i915/selftests/intel_lrc.c
index fb35f53c9ce3..8774a3ca5a97 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -405,6 +405,164 @@ static int live_suppress_self_preempt(void *arg)
        goto err_client_b;
 }
 
+static int __i915_sw_fence_call
+dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+       return NOTIFY_DONE;
+}
+
+static struct i915_request *dummy_request(struct intel_engine_cs *engine)
+{
+       struct i915_request *rq;
+
+       rq = kmalloc(sizeof(*rq), GFP_KERNEL | __GFP_ZERO);
+       if (!rq)
+               return NULL;
+
+       INIT_LIST_HEAD(&rq->active_list);
+       rq->engine = engine;
+
+       i915_sched_node_init(&rq->sched);
+
+       /* mark this request as permanently incomplete */
+       rq->fence.seqno = 1;
+       rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
+
+       i915_sw_fence_init(&rq->submit, dummy_notify);
+       i915_sw_fence_commit(&rq->submit);
+
+       return rq;
+}
+
+static void dummy_request_free(struct i915_request *dummy)
+{
+       i915_request_mark_complete(dummy);
+       i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
+       kfree(dummy);
+}
+
+static int live_suppress_wait_preempt(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct preempt_client client[4];
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       int err = -ENOMEM;
+       int i;
+
+       /*
+        * Waiters are given a little priority nudge, but not enough
+        * to actually cause any preemption. Double check that we do
+        * not needlessly generate preempt-to-idle cycles.
+        */
+
+       if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+               return 0;
+
+       if (USES_GUC_SUBMISSION(i915))
+               return 0; /* presume black blox */
+
+       mutex_lock(&i915->drm.struct_mutex);
+       wakeref = intel_runtime_pm_get(i915);
+
+       if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
+               goto err_unlock;
+       if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+               goto err_client_0;
+       if (preempt_client_init(i915, &client[2])) /* head of queue */
+               goto err_client_1;
+       if (preempt_client_init(i915, &client[3])) /* bystander */
+               goto err_client_2;
+
+       for_each_engine(engine, i915, id) {
+               int depth;
+
+               if (!engine->emit_init_breadcrumb)
+                       continue;
+
+               for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
+                       struct i915_request *rq[ARRAY_SIZE(client)];
+                       struct i915_request *dummy;
+
+                       engine->execlists.preempt_hang.count = 0;
+
+                       dummy = dummy_request(engine);
+                       if (!dummy)
+                               goto err_client_3;
+
+                       for (i = 0; i < ARRAY_SIZE(client); i++) {
+                               rq[i] = 
igt_spinner_create_request(&client[i].spin,
+                                                                  
client[i].ctx, engine,
+                                                                  MI_NOOP);
+                               if (IS_ERR(rq[i])) {
+                                       err = PTR_ERR(rq[i]);
+                                       goto err_wedged;
+                               }
+
+                               /* Disable NEWCLIENT promotion */
+                               
i915_gem_active_set(&rq[i]->timeline->last_request,
+                                                   dummy);
+                               i915_request_add(rq[i]);
+                       }
+
+                       dummy_request_free(dummy);
+
+                       GEM_BUG_ON(i915_request_completed(rq[0]));
+                       if (!igt_wait_for_spinner(&client[0].spin, rq[0])) {
+                               pr_err("First client failed to start\n");
+                               goto err_wedged;
+                       }
+                       GEM_BUG_ON(!i915_request_started(rq[0]));
+
+                       if (i915_request_wait(rq[depth],
+                                             I915_WAIT_LOCKED |
+                                             I915_WAIT_PRIORITY,
+                                             1) != -ETIME) {
+                               pr_err("Waiter depth:%d completed!\n", depth);
+                               goto err_wedged;
+                       }
+
+                       for (i = 0; i < ARRAY_SIZE(client); i++)
+                               igt_spinner_end(&client[i].spin);
+
+                       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+                               goto err_wedged;
+
+                       if (engine->execlists.preempt_hang.count) {
+                               pr_err("Preemption recorded x%d, depth %d; 
should have been suppressed!\n",
+                                      engine->execlists.preempt_hang.count,
+                                      depth);
+                               err = -EINVAL;
+                               goto err_client_3;
+                       }
+               }
+       }
+
+       err = 0;
+err_client_3:
+       preempt_client_fini(&client[3]);
+err_client_2:
+       preempt_client_fini(&client[2]);
+err_client_1:
+       preempt_client_fini(&client[1]);
+err_client_0:
+       preempt_client_fini(&client[0]);
+err_unlock:
+       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+               err = -EIO;
+       intel_runtime_pm_put(i915, wakeref);
+       mutex_unlock(&i915->drm.struct_mutex);
+       return err;
+
+err_wedged:
+       for (i = 0; i < ARRAY_SIZE(client); i++)
+               igt_spinner_end(&client[i].spin);
+       i915_gem_set_wedged(i915);
+       err = -EIO;
+       goto err_client_3;
+}
+
 static int live_preempt_hang(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -785,6 +943,7 @@ int intel_execlists_live_selftests(struct drm_i915_private 
*i915)
                SUBTEST(live_preempt),
                SUBTEST(live_late_preempt),
                SUBTEST(live_suppress_self_preempt),
+               SUBTEST(live_suppress_wait_preempt),
                SUBTEST(live_preempt_hang),
                SUBTEST(live_preempt_smoke),
        };
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 02/11] drm/i915/execlists: Suppress redundant preemption

Reply via email to