execlists: Pack the count into the low bits of the port.request

Tvrtko Ursulin Fri, 05 May 2017 04:58:34 -0700


On 05/05/2017 12:16, Chris Wilson wrote:

On Fri, May 05, 2017 at 11:49:21AM +0100, Tvrtko Ursulin wrote:


On 03/05/2017 12:37, Chris Wilson wrote:

+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}


Reason for not having one implementation of port_assign with
enable_nested_signalling outside it in the guc case?


The handling of port.count is a big difference between guc and
ordinary execlists. For execlists we count contexts, for guc we count
requests.

Bah missed it, scrolling up and down and relying on memory is not asubstitute for split screen..

static void execlists_dequeue(struct intel_engine_cs *engine)
{
        struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
        struct rb_node *rb;
        bool submit = false;

-       last = port->request;
+       last = port_request(port);
        if (last)
                /* WaIdleLiteRestore:bdw,skl
                 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                 */
                last->tail = last->wa_tail;

-       GEM_BUG_ON(port[1].request);
+       GEM_BUG_ON(port_isset(&port[1]));

        /* Hardware submission is through 2 ports. Conceptually each port
         * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)

                        GEM_BUG_ON(last->ctx == cursor->ctx);

-                       i915_gem_request_assign(&port->request, last);
+                       if (submit)
+                               port_assign(port, last);


Optimisation? GEM_BUG_ON(last != port_request(port))?


Kind of, it is so both paths look identical and yes so we do meet the
criteria that last != port_request(port) (because it is silly to the do
the request_count dance if the goal is not to change request_count).

Ok.

@@ -479,7 +484,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
+               port_assign(port, last);
                engine->execlist_first = rb;
        }
        spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                execlists_submit_ports(engine);
}

-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-       return !engine->execlist_port[0].request;
-}
-
static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
{
        const struct execlist_port *port = engine->execlist_port;

-       return port[0].count + port[1].count < 2;
+       return port_count(&port[0]) + port_count(&port[1]) < 2;
}

/*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                tail = GEN8_CSB_WRITE_PTR(head);
                head = GEN8_CSB_READ_PTR(head);
                while (head != tail) {
+                       struct drm_i915_gem_request *rq;
                        unsigned int status;
+                       unsigned int count;

                        if (++head == GEN8_CSB_ENTRIES)
                                head = 0;
@@ -577,20 +579,24 @@ static void intel_lrc_irq_handler(unsigned long data)
                        GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
                                         port[0].context_id);

-                       GEM_BUG_ON(port[0].count == 0);
-                       if (--port[0].count == 0) {
+                       rq = port_unpack(&port[0], &count);
+                       GEM_BUG_ON(count == 0);
+                       if (--count == 0) {


If you changed this to be:

count--;
port_set(...);
if (count > 0)
        continue;

It would be perhaps a bit more readable, but a potentially useless
port_set on the other hand. Not sure.


We expect to overwrite port in the common path, or at least that's my
expectation. Decrementing count for lite-restore should be the
exception. Part of the intention is to do the minimal amount of work
(especially avoiding the appearance of setting port.count = 0
prematurely) and pass the later port.count == 0 assert.

I've seen the mode where we just append and append and append with norequests out coming out in a while :), but I agree it is not the typicalcase. So as I said I am fine with this as it is.

                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               
GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-                               execlists_context_status_change(port[0].request,
-                                                               
INTEL_CONTEXT_SCHEDULE_OUT);
+                               GEM_BUG_ON(!i915_gem_request_completed(rq));
+                               execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_OUT);
+
+                               trace_i915_gem_request_out(rq);
+                               i915_gem_request_put(rq);

-                               trace_i915_gem_request_out(port[0].request);
-                               i915_gem_request_put(port[0].request);
                                port[0] = port[1];
                                memset(&port[1], 0, sizeof(port[1]));
+                       } else {
+                               port_set(&port[0], port_pack(rq, count));
                        }

-                       GEM_BUG_ON(port[0].count == 0 &&
+                       /* After the final element, the hw should be idle */
+                       GEM_BUG_ON(port_count(&port[0]) == 0 &&
                                   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
                }

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4a599e3480a9..68765d45116b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,14 @@ struct intel_engine_cs {
        /* Execlists */
        struct tasklet_struct irq_tasklet;
        struct execlist_port {
-               struct drm_i915_gem_request *request;
-               unsigned int count;
+               struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, 
EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
                GEM_DEBUG_DECL(u32 context_id);
        } execlist_port[2];
        struct rb_root execlist_queue;


Looks correct but I am still having trouble accepting the structure
shrink and code savings are worth having less readable code.


Excluding port_unpack, I think it's a reasonable tidy.

I think the tidy is separate from the packing. But ok, lets go with itand see what happens.


Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 3/9] drm/i915/execlists: Pack the count into the low bits of the port.request

Reply via email to