Re: [Intel-gfx] [PATCH 18/43] drm/i915/bdw: New logical ring submission mechanism

2014-08-11 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:26PM +0100, Thomas Daniel wrote:
 From: Oscar Mateo oscar.ma...@intel.com
 
 Well, new-ish: if all this code looks familiar, that's because it's
 a clone of the existing submission mechanism (with some modifications
 here and there to adapt it to LRCs and Execlists).
 
 And why did we do this instead of reusing code, one might wonder?
 Well, there are some fears that the differences are big enough that
 they will end up breaking all platforms.
 
 Also, Execlists offer several advantages, like control over when the
 GPU is done with a given workload, that can help simplify the
 submission mechanism, no doubt. I am interested in getting Execlists
 to work first and foremost, but in the future this parallel submission
 mechanism will help us to fine tune the mechanism without affecting
 old gens.
 
 v2: Pass the ringbuffer only (whenever possible).
 
 Signed-off-by: Oscar Mateo oscar.ma...@intel.com
 ---
  drivers/gpu/drm/i915/intel_lrc.c|  193 
 +++
  drivers/gpu/drm/i915/intel_lrc.h|   12 ++
  drivers/gpu/drm/i915/intel_ringbuffer.c |   20 ++--
  drivers/gpu/drm/i915/intel_ringbuffer.h |3 +
  4 files changed, 218 insertions(+), 10 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
 b/drivers/gpu/drm/i915/intel_lrc.c
 index f171fd5..bd37d51 100644
 --- a/drivers/gpu/drm/i915/intel_lrc.c
 +++ b/drivers/gpu/drm/i915/intel_lrc.c
 @@ -106,6 +106,199 @@ void intel_logical_ring_stop(struct intel_engine_cs 
 *ring)
   /* TODO */
  }
  
 +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 +{
 + intel_logical_ring_advance(ringbuf);
 +
 + if (intel_ring_stopped(ringbuf-ring))
 + return;
 +
 + /* TODO: how to submit a context to the ELSP is not here yet */
 +}
 +
 +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
 +{
 + if (ring-outstanding_lazy_seqno)
 + return 0;
 +
 + if (ring-preallocated_lazy_request == NULL) {
 + struct drm_i915_gem_request *request;
 +
 + request = kmalloc(sizeof(*request), GFP_KERNEL);
 + if (request == NULL)
 + return -ENOMEM;
 +
 + ring-preallocated_lazy_request = request;
 + }
 +
 + return i915_gem_get_seqno(ring-dev, ring-outstanding_lazy_seqno);
 +}
 +
 +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int 
 bytes)
 +{
 + struct intel_engine_cs *ring = ringbuf-ring;
 + struct drm_i915_gem_request *request;
 + u32 seqno = 0;
 + int ret;
 +
 + if (ringbuf-last_retired_head != -1) {
 + ringbuf-head = ringbuf-last_retired_head;
 + ringbuf-last_retired_head = -1;
 +
 + ringbuf-space = intel_ring_space(ringbuf);
 + if (ringbuf-space = bytes)
 + return 0;
 + }
 +
 + list_for_each_entry(request, ring-request_list, list) {
 + if (__intel_ring_space(request-tail, ringbuf-tail,
 + ringbuf-size) = bytes) {
 + seqno = request-seqno;
 + break;
 + }
 + }
 +
 + if (seqno == 0)
 + return -ENOSPC;
 +
 + ret = i915_wait_seqno(ring, seqno);
 + if (ret)
 + return ret;
 +
 + /* TODO: make sure we update the right ringbuffer's last_retired_head
 +  * when retiring requests */
 + i915_gem_retire_requests_ring(ring);
 + ringbuf-head = ringbuf-last_retired_head;
 + ringbuf-last_retired_head = -1;
 +
 + ringbuf-space = intel_ring_space(ringbuf);
 + return 0;
 +}
 +
 +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, int 
 bytes)
 +{
 + struct intel_engine_cs *ring = ringbuf-ring;
 + struct drm_device *dev = ring-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + unsigned long end;
 + int ret;
 +
 + ret = logical_ring_wait_request(ringbuf, bytes);
 + if (ret != -ENOSPC)
 + return ret;
 +
 + /* Force the context submission in case we have been skipping it */
 + intel_logical_ring_advance_and_submit(ringbuf);
 +
 + /* With GEM the hangcheck timer should kick us out of the loop,
 +  * leaving it early runs the risk of corrupting GEM state (due
 +  * to running on almost untested codepaths). But on resume
 +  * timers don't work yet, so prevent a complete hang in that
 +  * case by choosing an insanely large timeout. */
 + end = jiffies + 60 * HZ;
 +
 + do {
 + ringbuf-head = I915_READ_HEAD(ring);
 + ringbuf-space = intel_ring_space(ringbuf);
 + if (ringbuf-space = bytes) {
 + ret = 0;
 + break;
 + }
 +
 + if (!drm_core_check_feature(dev, DRIVER_MODESET) 
 + dev-primary-master) {
 + struct drm_i915_master_private *master_priv = 
 

[Intel-gfx] [PATCH 18/43] drm/i915/bdw: New logical ring submission mechanism

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo oscar.ma...@intel.com

Well, new-ish: if all this code looks familiar, that's because it's
a clone of the existing submission mechanism (with some modifications
here and there to adapt it to LRCs and Execlists).

And why did we do this instead of reusing code, one might wonder?
Well, there are some fears that the differences are big enough that
they will end up breaking all platforms.

Also, Execlists offer several advantages, like control over when the
GPU is done with a given workload, that can help simplify the
submission mechanism, no doubt. I am interested in getting Execlists
to work first and foremost, but in the future this parallel submission
mechanism will help us to fine tune the mechanism without affecting
old gens.

v2: Pass the ringbuffer only (whenever possible).

Signed-off-by: Oscar Mateo oscar.ma...@intel.com
---
 drivers/gpu/drm/i915/intel_lrc.c|  193 +++
 drivers/gpu/drm/i915/intel_lrc.h|   12 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   20 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |3 +
 4 files changed, 218 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f171fd5..bd37d51 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -106,6 +106,199 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
/* TODO */
 }
 
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+{
+   intel_logical_ring_advance(ringbuf);
+
+   if (intel_ring_stopped(ringbuf-ring))
+   return;
+
+   /* TODO: how to submit a context to the ELSP is not here yet */
+}
+
+static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
+{
+   if (ring-outstanding_lazy_seqno)
+   return 0;
+
+   if (ring-preallocated_lazy_request == NULL) {
+   struct drm_i915_gem_request *request;
+
+   request = kmalloc(sizeof(*request), GFP_KERNEL);
+   if (request == NULL)
+   return -ENOMEM;
+
+   ring-preallocated_lazy_request = request;
+   }
+
+   return i915_gem_get_seqno(ring-dev, ring-outstanding_lazy_seqno);
+}
+
+static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int 
bytes)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_i915_gem_request *request;
+   u32 seqno = 0;
+   int ret;
+
+   if (ringbuf-last_retired_head != -1) {
+   ringbuf-head = ringbuf-last_retired_head;
+   ringbuf-last_retired_head = -1;
+
+   ringbuf-space = intel_ring_space(ringbuf);
+   if (ringbuf-space = bytes)
+   return 0;
+   }
+
+   list_for_each_entry(request, ring-request_list, list) {
+   if (__intel_ring_space(request-tail, ringbuf-tail,
+   ringbuf-size) = bytes) {
+   seqno = request-seqno;
+   break;
+   }
+   }
+
+   if (seqno == 0)
+   return -ENOSPC;
+
+   ret = i915_wait_seqno(ring, seqno);
+   if (ret)
+   return ret;
+
+   /* TODO: make sure we update the right ringbuffer's last_retired_head
+* when retiring requests */
+   i915_gem_retire_requests_ring(ring);
+   ringbuf-head = ringbuf-last_retired_head;
+   ringbuf-last_retired_head = -1;
+
+   ringbuf-space = intel_ring_space(ringbuf);
+   return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, int 
bytes)
+{
+   struct intel_engine_cs *ring = ringbuf-ring;
+   struct drm_device *dev = ring-dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   unsigned long end;
+   int ret;
+
+   ret = logical_ring_wait_request(ringbuf, bytes);
+   if (ret != -ENOSPC)
+   return ret;
+
+   /* Force the context submission in case we have been skipping it */
+   intel_logical_ring_advance_and_submit(ringbuf);
+
+   /* With GEM the hangcheck timer should kick us out of the loop,
+* leaving it early runs the risk of corrupting GEM state (due
+* to running on almost untested codepaths). But on resume
+* timers don't work yet, so prevent a complete hang in that
+* case by choosing an insanely large timeout. */
+   end = jiffies + 60 * HZ;
+
+   do {
+   ringbuf-head = I915_READ_HEAD(ring);
+   ringbuf-space = intel_ring_space(ringbuf);
+   if (ringbuf-space = bytes) {
+   ret = 0;
+   break;
+   }
+
+   if (!drm_core_check_feature(dev, DRIVER_MODESET) 
+   dev-primary-master) {
+   struct drm_i915_master_private *master_priv = 
dev-primary-master-driver_priv;
+   if (master_priv-sarea_priv)
+