guc: Prepare for nonblocking execbuf submission

Dave Gordon Fri, 02 Sep 2016 11:21:52 -0700

On 30/08/16 09:18, Chris Wilson wrote:

Currently the presumption is that the request construction and its
submission to the GuC are all under the same holding of struct_mutex. We
wish to relax this to separate the request construction and the later
submission to the GuC. This requires us to reserve some space in the
GuC command queue for the future submission. For flexibility to handle
out-of-order request submission we do not preallocate the next slot in
the GuC command queue during request construction, just ensuring that
there is enough space later.


Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 55 ++++++++++++++----------------
 drivers/gpu/drm/i915/intel_guc.h           |  3 ++
 2 files changed, 29 insertions(+), 29 deletions(-)

Hmm .. the functional changes look mostly OK, apart from some locking,but there seems to be a great deal of unnecessary churn, such ascombining statements which had been kept separate for clarity :(

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 2332f9c98bdd..a047e61adc2a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -434,20 +434,23 @@ int i915_guc_wq_check_space(struct drm_i915_gem_request 
*request)
 {
        const size_t wqi_size = sizeof(struct guc_wq_item);
        struct i915_guc_client *gc = request->i915->guc.execbuf_client;
-       struct guc_process_desc *desc;
+       struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset;
        u32 freespace;
+       int ret;

-       GEM_BUG_ON(gc == NULL);
-
-       desc = gc->client_base + gc->proc_desc_offset;
-
+       spin_lock(&gc->lock);
        freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-       if (likely(freespace >= wqi_size))
-               return 0;
-
-       gc->no_wq_space += 1;
+       freespace -= gc->wq_rsvd;
+       if (likely(freespace >= wqi_size)) {
+               gc->wq_rsvd += wqi_size;
+               ret = 0;
+       } else {
+               gc->no_wq_space++;
+               ret = -EAGAIN;
+       }
+       spin_unlock(&gc->lock);

-       return -EAGAIN;
+       return ret;
 }

 static void guc_add_workqueue_item(struct i915_guc_client *gc,
@@ -457,22 +460,9 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
        const size_t wqi_size = sizeof(struct guc_wq_item);
        const u32 wqi_len = wqi_size/sizeof(u32) - 1;
        struct intel_engine_cs *engine = rq->engine;
-       struct guc_process_desc *desc;
        struct guc_wq_item *wqi;
        void *base;
-       u32 freespace, tail, wq_off, wq_page;
-
-       desc = gc->client_base + gc->proc_desc_offset;
-
-       /* Free space is guaranteed, see i915_guc_wq_check_space() above */

This comment seems to have been lost. It still applies (mutatismutandis), so it should be relocated to some part of the new version ...

-       freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size);
-       GEM_BUG_ON(freespace < wqi_size);
-
-       /* The GuC firmware wants the tail index in QWords, not bytes */
-       tail = rq->tail;
-       GEM_BUG_ON(tail & 7);
-       tail >>= 3;
-       GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);

This *commented* sequence of statements seems clearer than thereplacement below ...

+       u32 wq_off, wq_page;

        /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
         * should not have the case where structure wqi is across page, neither
@@ -482,18 +472,19 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
         * workqueue buffer dw by dw.
         */
        BUILD_BUG_ON(wqi_size != 16);


This would be a good place to note that:

/* Reserved space is guaranteed, see i915_guc_wq_check_space() above */

+       GEM_BUG_ON(gc->wq_rsvd < wqi_size);

        /* postincrement WQ tail for next time */
        wq_off = gc->wq_tail;
+       GEM_BUG_ON(wq_off & (wqi_size - 1));
        gc->wq_tail += wqi_size;
        gc->wq_tail &= gc->wq_size - 1;
-       GEM_BUG_ON(wq_off & (wqi_size - 1));
+       gc->wq_rsvd -= wqi_size;

        /* WQ starts from the page after doorbell / process_desc */
        wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
-       wq_off &= PAGE_SIZE - 1;
        base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page));
-       wqi = (struct guc_wq_item *)((char *)base + wq_off);
+       wqi = (struct guc_wq_item *)((char *)base + offset_in_page(wq_off));

        /* Now fill in the 4-word work queue item */
        wqi->header = WQ_TYPE_INORDER |
@@ -504,7 +495,7 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
        /* The GuC wants only the low-order word of the context descriptor */
        wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);

-       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+       wqi->ring_tail = rq->tail >> 3 << WQ_RING_TAIL_SHIFT;


This line is particularly ugly. I think it's the >> chevron << effect.
Parenthesisation would help, but it would be nicer as separate lines.

Also, there's no explanation of the "3" here, unlike the originalversion above.

        wqi->fence_id = rq->fence.seqno;

        kunmap_atomic(base);
@@ -591,8 +582,10 @@ static void i915_guc_submit(struct drm_i915_gem_request 
*rq)
        struct i915_guc_client *client = guc->execbuf_client;
        int b_ret;

+       spin_lock(&client->lock);
        guc_add_workqueue_item(client, rq);
        b_ret = guc_ring_doorbell(client);
+       spin_unlock(&client->lock);

        client->submissions[engine_id] += 1;

Outside the spinlock? Do we still have the BKL during submit(), just notduring i915_guc_wq_check_space() ? If so, then guc_ring_doorbell()doesn't strictly need to be inside the spinlock (or the lock could beinside guc_add_workqueue_item()); but if not then the update ofsubmissions[] should be inside.

        client->retcode = b_ret;
@@ -770,6 +763,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
        if (!client)
                return NULL;

+       spin_lock_init(&client->lock);
+
        client->owner = ctx;
        client->guc = guc;
        client->engines = engines;
@@ -1019,9 +1014,11 @@ int i915_guc_submission_enable(struct drm_i915_private 
*dev_priv)
                engine->submit_request = i915_guc_submit;

                /* Replay the current set of previously submitted requests */
-               list_for_each_entry(request, &engine->request_list, link)
+               list_for_each_entry(request, &engine->request_list, link) {
+                       client->wq_rsvd += sizeof(struct guc_wq_item);

Presumably this is being called in some context that ensures that wedon't need to hold the spinlock while updating wq_rsvd ? Maybe thatshould be mentioned ...

                        if (i915_sw_fence_done(&request->submit))
                                i915_guc_submit(request);
+               }
        }

        return 0;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index c97326269588..27a622824b54 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -68,6 +68,8 @@ struct i915_guc_client {
        struct i915_gem_context *owner;
        struct intel_guc *guc;

+       spinlock_t lock;

It would be helpful if this lock were annotated with a list of things itprotects. AFAICT it might be:

        wq_tail
        wq_rsvd
        no_wq_space
        cookie

b_fail and the submissions[] statistics seem to have been leftunprotected :( though of course they're not really critical


.Dave.

+
        uint32_t engines;               /* bitmap of (host) engine ids  */
        uint32_t priority;
        uint32_t ctx_index;
@@ -81,6 +83,7 @@ struct i915_guc_client {
        uint32_t wq_offset;
        uint32_t wq_size;
        uint32_t wq_tail;
+       uint32_t wq_rsvd;
        uint32_t no_wq_space;
        uint32_t b_fail;
        int retcode;


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 14/18] drm/i915/guc: Prepare for nonblocking execbuf submission

Reply via email to