Since schedule-in and schedule-out are now both always under the tasklet
bitlock, we can reduce the individual atomic operations to simple
instructions and worry less.

This notably eliminates the race observed with intel_context_inflight in
__engine_unpark().

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2583
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 52 ++++++++++++++---------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 5f90ecacc22b..d5bd537de9b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1331,11 +1331,11 @@ __execlists_schedule_in(struct i915_request *rq)
                ce->lrc.ccid = ce->tag;
        } else {
                /* We don't need a strict matching tag, just different values */
-               unsigned int tag = ffs(READ_ONCE(engine->context_tag));
+               unsigned int tag = __ffs(engine->context_tag);
 
-               GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG);
-               clear_bit(tag - 1, &engine->context_tag);
-               ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32);
+               GEM_BUG_ON(tag >= BITS_PER_LONG);
+               __clear_bit(tag, &engine->context_tag);
+               ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32);
 
                BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);
        }
@@ -1348,6 +1348,8 @@ __execlists_schedule_in(struct i915_request *rq)
        execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
        intel_engine_context_in(engine);
 
+       CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid);
+
        return engine;
 }
 
@@ -1359,13 +1361,10 @@ static inline void execlists_schedule_in(struct 
i915_request *rq, int idx)
        GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
        trace_i915_request_in(rq, idx);
 
-       old = READ_ONCE(ce->inflight);
-       do {
-               if (!old) {
-                       WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
-                       break;
-               }
-       } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+       old = ce->inflight;
+       if (!old)
+               old = __execlists_schedule_in(rq);
+       WRITE_ONCE(ce->inflight, ptr_inc(old));
 
        GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
 }
@@ -1418,12 +1417,11 @@ static void kick_siblings(struct i915_request *rq, 
struct intel_context *ce)
                tasklet_hi_schedule(&ve->base.execlists.tasklet);
 }
 
-static inline void
-__execlists_schedule_out(struct i915_request *rq,
-                        struct intel_engine_cs * const engine,
-                        unsigned int ccid)
+static inline void __execlists_schedule_out(struct i915_request *rq)
 {
        struct intel_context * const ce = rq->context;
+       struct intel_engine_cs * const engine = rq->engine;
+       unsigned int ccid;
 
        /*
         * NB process_csb() is not under the engine->active.lock and hence
@@ -1431,6 +1429,8 @@ __execlists_schedule_out(struct i915_request *rq,
         * refrain from doing non-trivial work here.
         */
 
+       CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
+
        if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
                execlists_check_context(ce, engine, "after");
 
@@ -1442,12 +1442,13 @@ __execlists_schedule_out(struct i915_request *rq,
            i915_request_completed(rq))
                intel_engine_add_retire(engine, ce->timeline);
 
+       ccid = ce->lrc.ccid;
        ccid >>= GEN11_SW_CTX_ID_SHIFT - 32;
        ccid &= GEN12_MAX_CONTEXT_HW_ID;
        if (ccid < BITS_PER_LONG) {
                GEM_BUG_ON(ccid == 0);
                GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
-               set_bit(ccid - 1, &engine->context_tag);
+               __set_bit(ccid - 1, &engine->context_tag);
        }
 
        intel_context_update_runtime(ce);
@@ -1468,26 +1469,23 @@ __execlists_schedule_out(struct i915_request *rq,
         */
        if (ce->engine != engine)
                kick_siblings(rq, ce);
-
-       intel_context_put(ce);
 }
 
 static inline void
 execlists_schedule_out(struct i915_request *rq)
 {
        struct intel_context * const ce = rq->context;
-       struct intel_engine_cs *cur, *old;
-       u32 ccid;
 
        trace_i915_request_out(rq);
 
-       ccid = rq->context->lrc.ccid;
-       old = READ_ONCE(ce->inflight);
-       do
-               cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
-       while (!try_cmpxchg(&ce->inflight, &old, cur));
-       if (!cur)
-               __execlists_schedule_out(rq, old, ccid);
+       GEM_BUG_ON(!ce->inflight);
+       ce->inflight = ptr_dec(ce->inflight);
+       if (!intel_context_inflight_count(ce)) {
+               GEM_BUG_ON(ce->inflight != rq->engine);
+               __execlists_schedule_out(rq);
+               WRITE_ONCE(ce->inflight, NULL);
+               intel_context_put(ce);
+       }
 
        i915_request_put(rq);
 }
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to