On 21/09/2019 10:55, Chris Wilson wrote:
If we are asked to submit a completed request, just move it onto the
active-list without modifying it's payload. If we try to emit the
modified payload of a completed request, we risk racing with the
ring->head update during retirement which may advance the head past our
breadcrumb and so we generate a warning for the emission being behind
the RING_HEAD.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
  drivers/gpu/drm/i915/gt/intel_lrc.c | 45 +++++++++++++----------------
  drivers/gpu/drm/i915/i915_request.c | 28 ++++++++++--------
  drivers/gpu/drm/i915/i915_request.h |  2 +-
  3 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 53e823d36b28..53bc4308793c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -806,6 +806,9 @@ static bool can_merge_rq(const struct i915_request *prev,
        GEM_BUG_ON(prev == next);
        GEM_BUG_ON(!assert_priority_queue(prev, next));
+ if (i915_request_completed(next))
+               return true;
+

This reads very un-intuitive. Why would it always be okay to merge possibly unrelated requests? From which it follows it must be a hack of some kind - from which it follows it needs a comment to explain it.

        if (!can_merge_ctx(prev->hw_context, next->hw_context))
                return false;
@@ -1188,21 +1191,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
                                continue;
                        }
- if (i915_request_completed(rq)) {
-                               ve->request = NULL;
-                               ve->base.execlists.queue_priority_hint = 
INT_MIN;
-                               rb_erase_cached(rb, &execlists->virtual);
-                               RB_CLEAR_NODE(rb);
-
-                               rq->engine = engine;
-                               __i915_request_submit(rq);
-
-                               spin_unlock(&ve->base.active.lock);
-
-                               rb = rb_first_cached(&execlists->virtual);
-                               continue;
-                       }
-
                        if (last && !can_merge_rq(last, rq)) {
                                spin_unlock(&ve->base.active.lock);
                                return; /* leave this for another */
@@ -1256,11 +1244,16 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                                GEM_BUG_ON(ve->siblings[0] != engine);
                        }
- __i915_request_submit(rq);
-                       if (!i915_request_completed(rq)) {
+                       if (__i915_request_submit(rq)) {
                                submit = true;
                                last = rq;
                        }
+
+                       if (!submit) {
+                               spin_unlock(&ve->base.active.lock);
+                               rb = rb_first_cached(&execlists->virtual);
+                               continue;
+                       }

This block also needs a comment I think. It's about skipping until first incomplete request in the queue?

                }
spin_unlock(&ve->base.active.lock);
@@ -1273,8 +1266,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                int i;
priolist_for_each_request_consume(rq, rn, p, i) {
-                       if (i915_request_completed(rq))
-                               goto skip;
+                       bool merge = true;
/*
                         * Can we combine this request with the current port?
@@ -1315,14 +1307,17 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                                    ctx_single_port_submission(rq->hw_context))
                                        goto done;
- *port = execlists_schedule_in(last, port - execlists->pending);
-                               port++;
+                               merge = false;
                        }
- last = rq;
-                       submit = true;
-skip:
-                       __i915_request_submit(rq);
+                       if (__i915_request_submit(rq)) {
+                               if (!merge) {
+                                       *port = execlists_schedule_in(last, port - 
execlists->pending);
+                                       port++;
+                               }
+                               submit = true;
+                               last = rq;
+                       }
                }
rb_erase_cached(&p->node, &execlists->queue);
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 9bd8538b1907..db1a0048a753 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
        return 0;
  }
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
  {
        struct intel_engine_cs *engine = request->engine;
+       bool result = false;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
                  engine->name,
@@ -389,6 +390,9 @@ void __i915_request_submit(struct i915_request *request)
        GEM_BUG_ON(!irqs_disabled());
        lockdep_assert_held(&engine->active.lock);
+ if (i915_request_completed(request))
+               goto xfer;

A comment here as well I think to explain what happens next with completed requests put on the active list. They just get removed during retire? Do they need to even be put on that list?

+
        if (i915_gem_context_is_banned(request->gem_context))
                i915_request_skip(request, -EIO);
@@ -412,13 +416,18 @@ void __i915_request_submit(struct i915_request *request)
            i915_sw_fence_signaled(&request->semaphore))
                engine->saturated |= request->sched.semaphores;
- /* We may be recursing from the signal callback of another i915 fence */
-       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+       engine->emit_fini_breadcrumb(request,
+                                    request->ring->vaddr + request->postfix);
- list_move_tail(&request->sched.link, &engine->active.requests);
+       trace_i915_request_execute(request);
+       engine->serial++;
+       result = true;
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
-       set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+xfer:  /* We may be recursing from the signal callback of another i915 fence */
+       spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+
+       if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+               list_move_tail(&request->sched.link, &engine->active.requests);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
            !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -429,12 +438,7 @@ void __i915_request_submit(struct i915_request *request)
spin_unlock(&request->lock); - engine->emit_fini_breadcrumb(request,
-                                    request->ring->vaddr + request->postfix);
-
-       engine->serial++;
-
-       trace_i915_request_execute(request);
+       return result;
  }
void i915_request_submit(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_request.h 
b/drivers/gpu/drm/i915/i915_request.h
index b18f49528ded..ec5bb4c2e5ae 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
void i915_request_add(struct i915_request *rq); -void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
  void i915_request_submit(struct i915_request *request);
void i915_request_skip(struct i915_request *request, int error);


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to