bdw: Emission of requests with logical rings

Daniel Vetter Mon, 11 Aug 2014 13:57:13 -0700

On Thu, Jul 24, 2014 at 05:04:29PM +0100, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.ma...@intel.com>
> 
> On a previous iteration of this patch, I created an Execlists
> version of __i915_add_request and asbtracted it away as a
> vfunc. Daniel Vetter wondered then why that was needed:
> 
> "with the clean split in command submission I expect every
> function to know wether it'll submit to an lrc (everything in
> intel_lrc.c) or wether it'll submit to a legacy ring (existing
> code), so I don't see a need for an add_request vfunc."
> 
> The honest, hairy truth is that this patch is the glue keeping
> the whole logical ring puzzle together:


Oops, I didn't spot this and it's indeed not terribly pretty.
> 
> - i915_add_request is used by intel_ring_idle, which in turn is
>   used by i915_gpu_idle, which in turn is used in several places
>   inside the eviction and gtt codes.

This should probably be folded in with the lrc specific version of
stop_rings and so should work out.

> - Also, it is used by i915_gem_check_olr, which is littered all
>   over i915_gem.c

We now always preallocate the request struct, so olr is officially dead.
Well almost, except for non-execbuf stuff that we emit through the rings.
Which is nothing for lrc/execlist mode.

Also there's the icky-bitty problem with ringbuf->ctx which makes this
patch not apply any more. I think we need to revise or at least discuss a
bit.

> - ...
> 
> If I were to duplicate all the code that directly or indirectly
> uses __i915_add_request, I'll end up creating a separate driver.
> 
> To show the differences between the existing legacy version and
> the new Execlists one, this time I have special-cased
> __i915_add_request instead of adding an add_request vfunc. I
> hope this helps to untangle this Gordian knot.
> 
> Signed-off-by: Oscar Mateo <oscar.ma...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c  |   72 
> ++++++++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/intel_lrc.c |   30 +++++++++++++---
>  drivers/gpu/drm/i915/intel_lrc.h |    1 +
>  3 files changed, 80 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9560b40..1c83b9c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2327,10 +2327,21 @@ int __i915_add_request(struct intel_engine_cs *ring,
>  {
>       struct drm_i915_private *dev_priv = ring->dev->dev_private;
>       struct drm_i915_gem_request *request;
> +     struct intel_ringbuffer *ringbuf;
>       u32 request_ring_position, request_start;
>       int ret;
>  
> -     request_start = intel_ring_get_tail(ring->buffer);
> +     request = ring->preallocated_lazy_request;
> +     if (WARN_ON(request == NULL))
> +             return -ENOMEM;
> +
> +     if (i915.enable_execlists) {
> +             struct intel_context *ctx = request->ctx;
> +             ringbuf = ctx->engine[ring->id].ringbuf;
> +     } else
> +             ringbuf = ring->buffer;
> +
> +     request_start = intel_ring_get_tail(ringbuf);
>       /*
>        * Emit any outstanding flushes - execbuf can fail to emit the flush
>        * after having emitted the batchbuffer command. Hence we need to fix
> @@ -2338,24 +2349,32 @@ int __i915_add_request(struct intel_engine_cs *ring,
>        * is that the flush _must_ happen before the next request, no matter
>        * what.
>        */
> -     ret = intel_ring_flush_all_caches(ring);
> -     if (ret)
> -             return ret;
> -
> -     request = ring->preallocated_lazy_request;
> -     if (WARN_ON(request == NULL))
> -             return -ENOMEM;
> +     if (i915.enable_execlists) {
> +             ret = logical_ring_flush_all_caches(ringbuf);
> +             if (ret)
> +                     return ret;
> +     } else {
> +             ret = intel_ring_flush_all_caches(ring);
> +             if (ret)
> +                     return ret;
> +     }
>  
>       /* Record the position of the start of the request so that
>        * should we detect the updated seqno part-way through the
>        * GPU processing the request, we never over-estimate the
>        * position of the head.
>        */
> -     request_ring_position = intel_ring_get_tail(ring->buffer);
> +     request_ring_position = intel_ring_get_tail(ringbuf);
>  
> -     ret = ring->add_request(ring);
> -     if (ret)
> -             return ret;
> +     if (i915.enable_execlists) {
> +             ret = ring->emit_request(ringbuf);
> +             if (ret)
> +                     return ret;
> +     } else {
> +             ret = ring->add_request(ring);
> +             if (ret)
> +                     return ret;
> +     }
>  
>       request->seqno = intel_ring_get_seqno(ring);
>       request->ring = ring;
> @@ -2370,12 +2389,14 @@ int __i915_add_request(struct intel_engine_cs *ring,
>        */
>       request->batch_obj = obj;
>  
> -     /* Hold a reference to the current context so that we can inspect
> -      * it later in case a hangcheck error event fires.
> -      */
> -     request->ctx = ring->last_context;
> -     if (request->ctx)
> -             i915_gem_context_reference(request->ctx);
> +     if (!i915.enable_execlists) {
> +             /* Hold a reference to the current context so that we can 
> inspect
> +              * it later in case a hangcheck error event fires.
> +              */
> +             request->ctx = ring->last_context;
> +             if (request->ctx)
> +                     i915_gem_context_reference(request->ctx);
> +     }
>  
>       request->emitted_jiffies = jiffies;
>       list_add_tail(&request->list, &ring->request_list);
> @@ -2630,6 +2651,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
> *ring)
>  
>       while (!list_empty(&ring->request_list)) {
>               struct drm_i915_gem_request *request;
> +             struct intel_ringbuffer *ringbuf;
>  
>               request = list_first_entry(&ring->request_list,
>                                          struct drm_i915_gem_request,
> @@ -2639,12 +2661,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
> *ring)
>                       break;
>  
>               trace_i915_gem_request_retire(ring, request->seqno);
> +
> +             /* This is one of the few common intersection points
> +              * between legacy ringbuffer submission and execlists:
> +              * we need to tell them apart in order to find the correct
> +              * ringbuffer to which the request belongs to.
> +              */
> +             if (i915.enable_execlists) {
> +                     struct intel_context *ctx = request->ctx;
> +                     ringbuf = ctx->engine[ring->id].ringbuf;
> +             } else
> +                     ringbuf = ring->buffer;
> +
>               /* We know the GPU must have read the request to have
>                * sent us the seqno + interrupt, so use the position
>                * of tail of the request to update the last known position
>                * of the GPU head.
>                */
> -             ring->buffer->last_retired_head = request->tail;
> +             ringbuf->last_retired_head = request->tail;
>  
>               i915_gem_free_request(request);
>       }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 5dd63d6..dcf59c6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -106,6 +106,22 @@ void intel_logical_ring_stop(struct intel_engine_cs 
> *ring)
>       /* TODO */
>  }
>  
> +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
> +{
> +     struct intel_engine_cs *ring = ringbuf->ring;
> +     int ret;
> +
> +     if (!ring->gpu_caches_dirty)
> +             return 0;
> +
> +     ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
> +     if (ret)
> +             return ret;
> +
> +     ring->gpu_caches_dirty = false;
> +     return 0;
> +}
> +
>  void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
>  {
>       intel_logical_ring_advance(ringbuf);
> @@ -116,7 +132,8 @@ void intel_logical_ring_advance_and_submit(struct 
> intel_ringbuffer *ringbuf)
>       /* TODO: how to submit a context to the ELSP is not here yet */
>  }
>  
> -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
> +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> +                                 struct intel_context *ctx)
>  {
>       if (ring->outstanding_lazy_seqno)
>               return 0;
> @@ -128,6 +145,13 @@ static int logical_ring_alloc_seqno(struct 
> intel_engine_cs *ring)
>               if (request == NULL)
>                       return -ENOMEM;
>  
> +             /* Hold a reference to the context this request belongs to
> +              * (we will need it when the time comes to emit/retire the
> +              * request).
> +              */
> +             request->ctx = ctx;
> +             i915_gem_context_reference(request->ctx);
> +
>               ring->preallocated_lazy_request = request;
>       }
>  
> @@ -165,8 +189,6 @@ static int logical_ring_wait_request(struct 
> intel_ringbuffer *ringbuf, int bytes
>       if (ret)
>               return ret;
>  
> -     /* TODO: make sure we update the right ringbuffer's last_retired_head
> -      * when retiring requests */
>       i915_gem_retire_requests_ring(ring);
>       ringbuf->head = ringbuf->last_retired_head;
>       ringbuf->last_retired_head = -1;
> @@ -291,7 +313,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer 
> *ringbuf, int num_dwords)
>               return ret;
>  
>       /* Preallocate the olr before touching the ring */
> -     ret = logical_ring_alloc_seqno(ring);
> +     ret = logical_ring_alloc_seqno(ring, ringbuf->ctx);

Ok, this is hairy. Really hairy, since this uses ringbuf->ctx. Not sure we
really want this or need this.

>       if (ret)
>               return ret;
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
> b/drivers/gpu/drm/i915/intel_lrc.h
> index 16798b6..696e09e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
>  void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
>  int intel_logical_rings_init(struct drm_device *dev);
>  
> +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
>  void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
>  static inline void intel_logical_ring_advance(struct intel_ringbuffer 
> *ringbuf)
>  {
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 21/43] drm/i915/bdw: Emission of requests with logical rings

Reply via email to