On 05/05/2017 12:16, Chris Wilson wrote:
On Fri, May 05, 2017 at 11:49:21AM +0100, Tvrtko Ursulin wrote:

On 03/05/2017 12:37, Chris Wilson wrote:
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port_isset(port))
+               i915_gem_request_put(port_request(port));
+
+       port_set(port, port_pack(i915_gem_request_get(rq), port_count(port)));
+}

Reason for not having one implementation of port_assign with
enable_nested_signalling outside it in the guc case?

The handling of port.count is a big difference between guc and
ordinary execlists. For execlists we count contexts, for guc we count
requests.

Bah missed it, scrolling up and down and relying on memory is not a substitute for split screen..

static void execlists_dequeue(struct intel_engine_cs *engine)
{
        struct drm_i915_gem_request *last;
@@ -397,7 +401,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
        struct rb_node *rb;
        bool submit = false;

-       last = port->request;
+       last = port_request(port);
        if (last)
                /* WaIdleLiteRestore:bdw,skl
                 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -407,7 +411,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                 */
                last->tail = last->wa_tail;

-       GEM_BUG_ON(port[1].request);
+       GEM_BUG_ON(port_isset(&port[1]));

        /* Hardware submission is through 2 ports. Conceptually each port
         * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -464,7 +468,8 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)

                        GEM_BUG_ON(last->ctx == cursor->ctx);

-                       i915_gem_request_assign(&port->request, last);
+                       if (submit)
+                               port_assign(port, last);

Optimisation? GEM_BUG_ON(last != port_request(port))?

Kind of, it is so both paths look identical and yes so we do meet the
criteria that last != port_request(port) (because it is silly to the do
the request_count dance if the goal is not to change request_count).

Ok.

@@ -479,7 +484,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
+               port_assign(port, last);
                engine->execlist_first = rb;
        }
        spin_unlock_irq(&engine->timeline->lock);
@@ -488,16 +493,11 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                execlists_submit_ports(engine);
}

-static bool execlists_elsp_idle(struct intel_engine_cs *engine)
-{
-       return !engine->execlist_port[0].request;
-}
-
static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
{
        const struct execlist_port *port = engine->execlist_port;

-       return port[0].count + port[1].count < 2;
+       return port_count(&port[0]) + port_count(&port[1]) < 2;
}

/*
@@ -547,7 +547,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                tail = GEN8_CSB_WRITE_PTR(head);
                head = GEN8_CSB_READ_PTR(head);
                while (head != tail) {
+                       struct drm_i915_gem_request *rq;
                        unsigned int status;
+                       unsigned int count;

                        if (++head == GEN8_CSB_ENTRIES)
                                head = 0;
@@ -577,20 +579,24 @@ static void intel_lrc_irq_handler(unsigned long data)
                        GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
                                         port[0].context_id);

-                       GEM_BUG_ON(port[0].count == 0);
-                       if (--port[0].count == 0) {
+                       rq = port_unpack(&port[0], &count);
+                       GEM_BUG_ON(count == 0);
+                       if (--count == 0) {

If you changed this to be:

count--;
port_set(...);
if (count > 0)
        continue;

It would be perhaps a bit more readable, but a potentially useless
port_set on the other hand. Not sure.

We expect to overwrite port in the common path, or at least that's my
expectation. Decrementing count for lite-restore should be the
exception. Part of the intention is to do the minimal amount of work
(especially avoiding the appearance of setting port.count = 0
prematurely) and pass the later port.count == 0 assert.

I've seen the mode where we just append and append and append with no requests out coming out in a while :), but I agree it is not the typical case. So as I said I am fine with this as it is.


                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               
GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-                               execlists_context_status_change(port[0].request,
-                                                               
INTEL_CONTEXT_SCHEDULE_OUT);
+                               GEM_BUG_ON(!i915_gem_request_completed(rq));
+                               execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_OUT);
+
+                               trace_i915_gem_request_out(rq);
+                               i915_gem_request_put(rq);

-                               trace_i915_gem_request_out(port[0].request);
-                               i915_gem_request_put(port[0].request);
                                port[0] = port[1];
                                memset(&port[1], 0, sizeof(port[1]));
+                       } else {
+                               port_set(&port[0], port_pack(rq, count));
                        }

-                       GEM_BUG_ON(port[0].count == 0 &&
+                       /* After the final element, the hw should be idle */
+                       GEM_BUG_ON(port_count(&port[0]) == 0 &&
                                   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
                }

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4a599e3480a9..68765d45116b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,14 @@ struct intel_engine_cs {
        /* Execlists */
        struct tasklet_struct irq_tasklet;
        struct execlist_port {
-               struct drm_i915_gem_request *request;
-               unsigned int count;
+               struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, 
EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
                GEM_DEBUG_DECL(u32 context_id);
        } execlist_port[2];
        struct rb_root execlist_queue;


Looks correct but I am still having trouble accepting the structure
shrink and code savings are worth having less readable code.

Excluding port_unpack, I think it's a reasonable tidy.

I think the tidy is separate from the packing. But ok, lets go with it and see what happens.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to