[Intel-gfx] [PATCH] drm/i915: Use the correct size of the GTT for placing the per-process entries

2012-08-24 Thread Chris Wilson
The current layout is to place the per-process tables at the end of the
GTT. However, this is currently using a hardcoded maximum size for the GTT
and not taking in account limitations imposed by the BIOS. Use the value
for the total number of entries allocated in the table as provided by
the configuration registers.

Reported-by: Matthew Garrett m...@redhat.com
Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Cc: Ben Widawsky b...@bwidawsk.net
Cc: Matthew Garret m...@redhat.com
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 528fd43..4c03544 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -72,7 +72,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
 * entries. For aliasing ppgtt support we just steal them at the end for
 * now. */
-   first_pd_entry_in_global_pt = 512*1024 - I915_PPGTT_PD_ENTRIES;
+   first_pd_entry_in_global_pt = dev_priv-mm.gtt-gtt_total_entries - 
I915_PPGTT_PD_ENTRIES;
 
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
if (!ppgtt)
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Use a non-blocking wait for set-to-domain ioctl

2012-08-24 Thread Chris Wilson
The principal use for set-to-domain is for userspace to serialise
operations with a particular buffer, for example to maintain coherency
with a CPU map or to ratelimit its rendering by waiting on all previous
operations before continuing. As such we tend to hold the struct_mutex
for long periods during the synchronisation and so cause contention
issues with other users of the graphics device, even for independent
operations as memory management. An example is the contention between
compiz and X which causes jitter in the display and a drop in peak
throughput.

The ultimate solution would be a set of fine grained locks and lockless
operations, but an intermediate step is to first attempt the
synchronisation for set-to-domain without holding the mutex. This
introduces a number of race conditions, so we limit it use to the ioctl
periphery where we have no dependent state and can safely complete with
a locked synchronisation afterwards.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c |   55 +++
 1 file changed, 55 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0b236ea..719a933 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1133,6 +1133,52 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 }
 
+/* A nonblocking variant of the above wait. This is a highly dangerous routine
+ * as the object state may change during this call.
+ */
+static __must_check int
+i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+   bool readonly)
+{
+   struct drm_device *dev = obj-base.dev;
+   struct drm_i915_private *dev_priv = dev-dev_private;
+   struct intel_ring_buffer *ring = obj-ring;
+   u32 seqno;
+   int ret;
+
+   BUG_ON(!mutex_is_locked(dev-struct_mutex));
+   BUG_ON(!dev_priv-mm.interruptible);
+
+   seqno = readonly ? obj-last_write_seqno : obj-last_read_seqno;
+   if (seqno == 0)
+   return 0;
+
+   ret = i915_gem_check_wedge(dev_priv, true);
+   if (ret)
+   return ret;
+
+   ret = i915_gem_check_olr(ring, seqno);
+   if (ret)
+   return ret;
+
+   mutex_unlock(dev-struct_mutex);
+   ret = __wait_seqno(ring, seqno, true, NULL);
+   mutex_lock(dev-struct_mutex);
+
+   i915_gem_retire_requests_ring(ring);
+
+   /* Manually manage the write flush as we may have not yet
+* retired the buffer.
+*/
+   if (obj-last_write_seqno 
+   i915_seqno_passed(seqno, obj-last_write_seqno)) {
+   obj-last_write_seqno = 0;
+   obj-base.write_domain = ~I915_GEM_GPU_DOMAINS;
+   }
+
+   return ret;
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1170,6 +1216,14 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
goto unlock;
}
 
+   /* Try to flush the object off the GPU without holding the lock.
+* We will repeat the flush holding the lock in the normal manner
+* to catch cases where we are gazumped.
+*/
+   ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
+   if (ret)
+   goto unref;
+
if (read_domains  I915_GEM_DOMAIN_GTT) {
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 
@@ -1183,6 +1237,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
}
 
+unref:
drm_gem_object_unreference(obj-base);
 unlock:
mutex_unlock(dev-struct_mutex);
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: Juggle code order to ease flow of the next patch

2012-08-24 Thread Chris Wilson
Move the wait-for-rendering logic around in the file so that we can
group it together with the subsequent variations. The general goal is to
have the lower level routines clustered together and then the higher
level logic building upon those low level routines that came before.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c |  379 +++
 1 file changed, 188 insertions(+), 191 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5531758..0b236ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -945,6 +945,194 @@ unlock:
return ret;
 }
 
+int
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+bool interruptible)
+{
+   if (atomic_read(dev_priv-mm.wedged)) {
+   struct completion *x = dev_priv-error_completion;
+   bool recovery_complete;
+   unsigned long flags;
+
+   /* Give the error handler a chance to run. */
+   spin_lock_irqsave(x-wait.lock, flags);
+   recovery_complete = x-done  0;
+   spin_unlock_irqrestore(x-wait.lock, flags);
+
+   /* Non-interruptible callers can't handle -EAGAIN, hence return
+* -EIO unconditionally for these. */
+   if (!interruptible)
+   return -EIO;
+
+   /* Recovery complete, but still wedged means reset failure. */
+   if (recovery_complete)
+   return -EIO;
+
+   return -EAGAIN;
+   }
+
+   return 0;
+}
+
+/*
+ * Compare seqno against outstanding lazy request. Emit a request if they are
+ * equal.
+ */
+static int
+i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
+{
+   int ret;
+
+   BUG_ON(!mutex_is_locked(ring-dev-struct_mutex));
+
+   ret = 0;
+   if (seqno == ring-outstanding_lazy_request)
+   ret = i915_add_request(ring, NULL, NULL);
+
+   return ret;
+}
+
+/**
+ * __wait_seqno - wait until execution of seqno has finished
+ * @ring: the ring expected to report seqno
+ * @seqno: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time 
remaining
+ *
+ * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
+   bool interruptible, struct timespec *timeout)
+{
+   drm_i915_private_t *dev_priv = ring-dev-dev_private;
+   struct timespec before, now, wait_time={1,0};
+   unsigned long timeout_jiffies;
+   long end;
+   bool wait_forever = true;
+   int ret;
+
+   if (i915_seqno_passed(ring-get_seqno(ring, true), seqno))
+   return 0;
+
+   trace_i915_gem_request_wait_begin(ring, seqno);
+
+   if (timeout != NULL) {
+   wait_time = *timeout;
+   wait_forever = false;
+   }
+
+   timeout_jiffies = timespec_to_jiffies(wait_time);
+
+   if (WARN_ON(!ring-irq_get(ring)))
+   return -ENODEV;
+
+   /* Record current time in case interrupted by signal, or wedged * */
+   getrawmonotonic(before);
+
+#define EXIT_COND \
+   (i915_seqno_passed(ring-get_seqno(ring, false), seqno) || \
+   atomic_read(dev_priv-mm.wedged))
+   do {
+   if (interruptible)
+   end = wait_event_interruptible_timeout(ring-irq_queue,
+  EXIT_COND,
+  timeout_jiffies);
+   else
+   end = wait_event_timeout(ring-irq_queue, EXIT_COND,
+timeout_jiffies);
+
+   ret = i915_gem_check_wedge(dev_priv, interruptible);
+   if (ret)
+   end = ret;
+   } while (end == 0  wait_forever);
+
+   getrawmonotonic(now);
+
+   ring-irq_put(ring);
+   trace_i915_gem_request_wait_end(ring, seqno);
+#undef EXIT_COND
+
+   if (timeout) {
+   struct timespec sleep_time = timespec_sub(now, before);
+   *timeout = timespec_sub(*timeout, sleep_time);
+   }
+
+   switch (end) {
+   case -EIO:
+   case -EAGAIN: /* Wedged */
+   case -ERESTARTSYS: /* Signal */
+   return (int)end;
+   case 0: /* Timeout */
+   if (timeout)
+   set_normalized_timespec(timeout, 0, 0);
+   return -ETIME;
+   default: /* Completed */
+   WARN_ON(end  0); /* We're not aware of other errors */
+   return 0;
+   }
+}
+
+/**
+ * Waits for a sequence number to be signaled, and cleans up the
+ * request and object lists appropriately 

Re: [Intel-gfx] [PATCH] drm/i915: Use the correct size of the GTT for placing the per-process entries

2012-08-24 Thread Daniel Vetter
On Fri, Aug 24, 2012 at 09:12:22AM +0100, Chris Wilson wrote:
 The current layout is to place the per-process tables at the end of the
 GTT. However, this is currently using a hardcoded maximum size for the GTT
 and not taking in account limitations imposed by the BIOS. Use the value
 for the total number of entries allocated in the table as provided by
 the configuration registers.
 
 Reported-by: Matthew Garrett m...@redhat.com
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Ben Widawsky b...@bwidawsk.net
 Cc: Matthew Garret m...@redhat.com
Picked up for -fixes, thanks for the patch.
-Daniel
-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3 v4] drm/i915: Extract reading INSTDONE

2012-08-24 Thread Jani Nikula
On Fri, 24 Aug 2012, Ben Widawsky b...@bwidawsk.net wrote:
 INSTDONE is used in many places, and it varies from generation to
 generation. This provides a good reason for us to extract the logic to
 read the relevant information.

 The patch has no functional change. It's prep for some new stuff.

 v2: move the memset inside of i915_get_extra_instdone (Jani)
 v3,4: bugs caught by (Jani)

On the series,

Reviewed-by: Jani Nikula jani.nik...@intel.com


 Cc: Jani Nikula jani.nik...@intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  drivers/gpu/drm/i915/i915_irq.c | 50 
 +
  drivers/gpu/drm/i915/i915_reg.h |  1 +
  2 files changed, 32 insertions(+), 19 deletions(-)

 diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
 index 3633029..b7ec34e 100644
 --- a/drivers/gpu/drm/i915/i915_irq.c
 +++ b/drivers/gpu/drm/i915/i915_irq.c
 @@ -1070,6 +1070,23 @@ i915_error_first_batchbuffer(struct drm_i915_private 
 *dev_priv,
   return NULL;
  }
  
 +/* NB: please notice the memset */
 +static void i915_get_extra_instdone(struct drm_device *dev,
 + uint32_t *instdone)
 +{
 + struct drm_i915_private *dev_priv = dev-dev_private;
 + memset(instdone, 0, sizeof(*instdone) * I915_NUM_INSTDONE_REG);
 +
 + if (INTEL_INFO(dev)-gen  4) {
 + instdone[0] = I915_READ(INSTDONE);
 + instdone[1] = 0;
 + } else {
 + instdone[0] = I915_READ(INSTDONE_I965);
 + instdone[1] = I915_READ(INSTDONE1);
 + }
 +}
 +
 +
  static void i915_record_ring_state(struct drm_device *dev,
  struct drm_i915_error_state *error,
  struct intel_ring_buffer *ring)
 @@ -1288,6 +1305,7 @@ void i915_destroy_error_state(struct drm_device *dev)
  static void i915_report_and_clear_eir(struct drm_device *dev)
  {
   struct drm_i915_private *dev_priv = dev-dev_private;
 + uint32_t instdone[I915_NUM_INSTDONE_REG];
   u32 eir = I915_READ(EIR);
   int pipe;
  
 @@ -1296,16 +1314,17 @@ static void i915_report_and_clear_eir(struct 
 drm_device *dev)
  
   pr_err(render error detected, EIR: 0x%08x\n, eir);
  
 + i915_get_extra_instdone(dev, instdone);
 +
   if (IS_G4X(dev)) {
   if (eir  (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
   u32 ipeir = I915_READ(IPEIR_I965);
  
   pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR_I965));
   pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR_I965));
 - pr_err(  INSTDONE: 0x%08x\n,
 -I915_READ(INSTDONE_I965));
 + pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
   pr_err(  INSTPS: 0x%08x\n, I915_READ(INSTPS));
 - pr_err(  INSTDONE1: 0x%08x\n, I915_READ(INSTDONE1));
 + pr_err(  INSTDONE1: 0x%08x\n, instdone[1]);
   pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD_I965));
   I915_WRITE(IPEIR_I965, ipeir);
   POSTING_READ(IPEIR_I965);
 @@ -1344,7 +1363,7 @@ static void i915_report_and_clear_eir(struct drm_device 
 *dev)
  
   pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR));
   pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR));
 - pr_err(  INSTDONE: 0x%08x\n, I915_READ(INSTDONE));
 + pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
   pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD));
   I915_WRITE(IPEIR, ipeir);
   POSTING_READ(IPEIR);
 @@ -1353,10 +1372,9 @@ static void i915_report_and_clear_eir(struct 
 drm_device *dev)
  
   pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR_I965));
   pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR_I965));
 - pr_err(  INSTDONE: 0x%08x\n,
 -I915_READ(INSTDONE_I965));
 + pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
   pr_err(  INSTPS: 0x%08x\n, I915_READ(INSTPS));
 - pr_err(  INSTDONE1: 0x%08x\n, I915_READ(INSTDONE1));
 + pr_err(  INSTDONE1: 0x%08x\n, instdone[1]);
   pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD_I965));
   I915_WRITE(IPEIR_I965, ipeir);
   POSTING_READ(IPEIR_I965);
 @@ -1671,7 +1689,7 @@ void i915_hangcheck_elapsed(unsigned long data)
  {
   struct drm_device *dev = (struct drm_device *)data;
   drm_i915_private_t *dev_priv = dev-dev_private;
 - uint32_t acthd[I915_NUM_RINGS], instdone, instdone1;
 + uint32_t acthd[I915_NUM_RINGS], instdone[I915_NUM_INSTDONE_REG];
   struct intel_ring_buffer *ring;
   bool err = false, idle;
   int i;
 @@ -1699,25 +1717,19 @@ void i915_hangcheck_elapsed(unsigned long data)
   return;
   }
  
 

Re: [Intel-gfx] [PATCH 3/3] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer

2012-08-24 Thread Daniel Vetter
On Thu, Aug 23, 2012 at 01:12:53PM +0100, Chris Wilson wrote:
 If we need to stall in order to complete the pin_and_fence operation
 during execbuffer reservation, there is a high likelihood that the
 operation will be interrupted by a signal (thanks X!). In order to
 simplify the cleanup along that error path, the object was
 unconditionally unbound and the error propagated. However, being
 interrupted here is far more common than I would like and so we can
 strive to avoid the extra work by eliminating the forced unbind.
 
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk

I've merged the resend of patch 1/3, thanks a lot. But for this one here
I've found a few tiny things to bitch about. Comments inline below.
-Daniel

 ---
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   93 
 
  1 file changed, 38 insertions(+), 55 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
 b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 index 23aa324..0c5a433 100644
 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 @@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
   return ret;
  }
  
 -#define  __EXEC_OBJECT_HAS_FENCE (131)
 +#define  __EXEC_OBJECT_HAS_PIN (131)
 +#define  __EXEC_OBJECT_HAS_FENCE (130)
  
  static int
  need_reloc_mappable(struct drm_i915_gem_object *obj)
 @@ -344,6 +345,7 @@ static int
  pin_and_fence_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring)
  {
 + struct drm_i915_private *dev_priv = obj-base.dev-dev_private;
   struct drm_i915_gem_exec_object2 *entry = obj-exec_entry;
   bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
   bool need_fence, need_mappable;
 @@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
   if (ret)
   return ret;
  
 + entry-flags |= __EXEC_OBJECT_HAS_PIN;
 +
   if (has_fenced_gpu_access) {
   if (entry-flags  EXEC_OBJECT_NEEDS_FENCE) {
   ret = i915_gem_object_get_fence(obj);
   if (ret)
 - goto err_unpin;
 + return ret;
  
   if (i915_gem_object_pin_fence(obj))
   entry-flags |= __EXEC_OBJECT_HAS_FENCE;
 @@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
   }
   }
  
 + /* ... and ensure ppgtt mapping exist if needed. */

Nitpick: the ... and in this move comment looks a bit stale with the
previously preceeding comment no longer in the same scope ;-)

 + if (dev_priv-mm.aliasing_ppgtt  !obj-has_aliasing_ppgtt_mapping) {
 + i915_ppgtt_bind_object(dev_priv-mm.aliasing_ppgtt,
 +obj, obj-cache_level);
 +
 + obj-has_aliasing_ppgtt_mapping = 1;
 + }
 +
   entry-offset = obj-gtt_offset;
   return 0;
 +}
  
 -err_unpin:
 - i915_gem_object_unpin(obj);
 - return ret;
 +static void
 +unpin_and_fence_object(struct drm_i915_gem_object *obj)
 +{
 + struct drm_i915_gem_exec_object2 *entry;
 +
 + if (!obj-gtt_space)
 + return;
 +
 + entry = obj-exec_entry;
 +
 + if (entry-flags  __EXEC_OBJECT_HAS_FENCE)
 + i915_gem_object_unpin_fence(obj);
 +
 + if (entry-flags  __EXEC_OBJECT_HAS_PIN)
 + i915_gem_object_unpin(obj);
 +
 + entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
  }
  
  static int
 @@ -385,7 +412,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
 *ring,
   struct drm_file *file,
   struct list_head *objects)
  {
 - drm_i915_private_t *dev_priv = ring-dev-dev_private;
   struct drm_i915_gem_object *obj;
   int ret, retry;
   bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
 @@ -463,45 +489,13 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
 *ring,
   continue;
  
   ret = pin_and_fence_object(obj, ring);
 - if (ret) {
 - int ret_ignore;
 -
 - /* This can potentially raise a harmless
 -  * -EINVAL if we failed to bind in the above
 -  * call. It cannot raise -EINTR since we know
 -  * that the bo is freshly bound and so will
 -  * not need to be flushed or waited upon.
 -  */
 - ret_ignore = i915_gem_object_unbind(obj);
 - (void)ret_ignore;
 - WARN_ON(obj-gtt_space);
 + if (ret)
   break;
 - }
   }
  
   /* Decrement pin count for bound objects */
 - 

[Intel-gfx] [PATCH] drm/i915: align vlv forcewake with common lore

2012-08-24 Thread Daniel Vetter
For some odd reasons, the vlv forcewake code is rather different from
all other platforms, with no clear justifaction. Adjust things:

- don't check whether the gt is awak already (and bail out early), we
  need to grab a forcewake anyway. Otherwise the chip might go to
  sleep too early, and this would also screw up our forcewake
  accounting.
- Like all other platforms, check whether the gt has cleared the
  forcewake bit in the _ACK register before setting it again.
- Use _MASKED_BIT_ENABLE/DISABLE macros
- Only use bit0 of the fw reg, not all 16 bits.
- check the gtfifodb reg like on all other platforms in _put.
- Drop the POSTING_READs for consitency.

v2: Failure to git add ... again.

Tested-by: Purushothaman, Vijay A vijay.a.purushotha...@intel.com
Cc: Widawsky, Benjamin benjamin.widaw...@intel.com
Signed-Off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 drivers/gpu/drm/i915/intel_pm.c |   14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 94aabca..839581a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4064,12 +4064,10 @@ int __gen6_gt_wait_for_fifo(struct drm_i915_private 
*dev_priv)
 
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
-   /* Already awake? */
-   if ((I915_READ(0x130094)  0xa1) == 0xa1)
-   return;
+   if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1) == 0, 
500))
+   DRM_ERROR(Force wake wait timed out\n);
 
-   I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
-   POSTING_READ(FORCEWAKE_VLV);
+   I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(1));
 
if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1), 500))
DRM_ERROR(Force wake wait timed out\n);
@@ -4079,9 +4077,9 @@ static void vlv_force_wake_get(struct drm_i915_private 
*dev_priv)
 
 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 {
-   I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
-   /* FIXME: confirm VLV behavior with Punit folks */
-   POSTING_READ(FORCEWAKE_VLV);
+   I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(1));
+   /* The below doubles as a POSTING_READ */
+   gen6_gt_check_fifodbg(dev_priv);
 }
 
 void intel_gt_init(struct drm_device *dev)
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3 v4] drm/i915: Extract reading INSTDONE

2012-08-24 Thread Daniel Vetter
On Fri, Aug 24, 2012 at 12:26:05PM +0300, Jani Nikula wrote:
 On Fri, 24 Aug 2012, Ben Widawsky b...@bwidawsk.net wrote:
  INSTDONE is used in many places, and it varies from generation to
  generation. This provides a good reason for us to extract the logic to
  read the relevant information.
 
  The patch has no functional change. It's prep for some new stuff.
 
  v2: move the memset inside of i915_get_extra_instdone (Jani)
  v3,4: bugs caught by (Jani)
 
 On the series,
 
 Reviewed-by: Jani Nikula jani.nik...@intel.com
All 3 queued for -next, thanks for the patches and review.
-Daniel
 
 
  Cc: Jani Nikula jani.nik...@intel.com
  Signed-off-by: Ben Widawsky b...@bwidawsk.net
  ---
   drivers/gpu/drm/i915/i915_irq.c | 50 
  +
   drivers/gpu/drm/i915/i915_reg.h |  1 +
   2 files changed, 32 insertions(+), 19 deletions(-)
 
  diff --git a/drivers/gpu/drm/i915/i915_irq.c 
  b/drivers/gpu/drm/i915/i915_irq.c
  index 3633029..b7ec34e 100644
  --- a/drivers/gpu/drm/i915/i915_irq.c
  +++ b/drivers/gpu/drm/i915/i915_irq.c
  @@ -1070,6 +1070,23 @@ i915_error_first_batchbuffer(struct drm_i915_private 
  *dev_priv,
  return NULL;
   }
   
  +/* NB: please notice the memset */
  +static void i915_get_extra_instdone(struct drm_device *dev,
  +   uint32_t *instdone)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   memset(instdone, 0, sizeof(*instdone) * I915_NUM_INSTDONE_REG);
  +
  +   if (INTEL_INFO(dev)-gen  4) {
  +   instdone[0] = I915_READ(INSTDONE);
  +   instdone[1] = 0;
  +   } else {
  +   instdone[0] = I915_READ(INSTDONE_I965);
  +   instdone[1] = I915_READ(INSTDONE1);
  +   }
  +}
  +
  +
   static void i915_record_ring_state(struct drm_device *dev,
 struct drm_i915_error_state *error,
 struct intel_ring_buffer *ring)
  @@ -1288,6 +1305,7 @@ void i915_destroy_error_state(struct drm_device *dev)
   static void i915_report_and_clear_eir(struct drm_device *dev)
   {
  struct drm_i915_private *dev_priv = dev-dev_private;
  +   uint32_t instdone[I915_NUM_INSTDONE_REG];
  u32 eir = I915_READ(EIR);
  int pipe;
   
  @@ -1296,16 +1314,17 @@ static void i915_report_and_clear_eir(struct 
  drm_device *dev)
   
  pr_err(render error detected, EIR: 0x%08x\n, eir);
   
  +   i915_get_extra_instdone(dev, instdone);
  +
  if (IS_G4X(dev)) {
  if (eir  (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
  u32 ipeir = I915_READ(IPEIR_I965);
   
  pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR_I965));
  pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR_I965));
  -   pr_err(  INSTDONE: 0x%08x\n,
  -  I915_READ(INSTDONE_I965));
  +   pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
  pr_err(  INSTPS: 0x%08x\n, I915_READ(INSTPS));
  -   pr_err(  INSTDONE1: 0x%08x\n, I915_READ(INSTDONE1));
  +   pr_err(  INSTDONE1: 0x%08x\n, instdone[1]);
  pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD_I965));
  I915_WRITE(IPEIR_I965, ipeir);
  POSTING_READ(IPEIR_I965);
  @@ -1344,7 +1363,7 @@ static void i915_report_and_clear_eir(struct 
  drm_device *dev)
   
  pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR));
  pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR));
  -   pr_err(  INSTDONE: 0x%08x\n, I915_READ(INSTDONE));
  +   pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
  pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD));
  I915_WRITE(IPEIR, ipeir);
  POSTING_READ(IPEIR);
  @@ -1353,10 +1372,9 @@ static void i915_report_and_clear_eir(struct 
  drm_device *dev)
   
  pr_err(  IPEIR: 0x%08x\n, I915_READ(IPEIR_I965));
  pr_err(  IPEHR: 0x%08x\n, I915_READ(IPEHR_I965));
  -   pr_err(  INSTDONE: 0x%08x\n,
  -  I915_READ(INSTDONE_I965));
  +   pr_err(  INSTDONE: 0x%08x\n, instdone[0]);
  pr_err(  INSTPS: 0x%08x\n, I915_READ(INSTPS));
  -   pr_err(  INSTDONE1: 0x%08x\n, I915_READ(INSTDONE1));
  +   pr_err(  INSTDONE1: 0x%08x\n, instdone[1]);
  pr_err(  ACTHD: 0x%08x\n, I915_READ(ACTHD_I965));
  I915_WRITE(IPEIR_I965, ipeir);
  POSTING_READ(IPEIR_I965);
  @@ -1671,7 +1689,7 @@ void i915_hangcheck_elapsed(unsigned long data)
   {
  struct drm_device *dev = (struct drm_device *)data;
  drm_i915_private_t *dev_priv = dev-dev_private;
  -   uint32_t acthd[I915_NUM_RINGS], instdone, instdone1;
  +   uint32_t acthd[I915_NUM_RINGS], instdone[I915_NUM_INSTDONE_REG];
  struct intel_ring_buffer *ring;
  bool err = 

[Intel-gfx] [PATCH 1/2] drm/i915: align vlv forcewake with common lore

2012-08-24 Thread Daniel Vetter
For some odd reasons, the vlv forcewake code is rather different from
all other platforms, with no clear justification. Adjust things:

- Don't check whether the gt is awake already (and bail out early), we
  need to grab a forcewake anyway. Otherwise the chip might go to
  sleep too early. And this would also screw up our forcewake
  accounting.
- Like all other platforms, check whether the gt has cleared the
  forcewake bit in the _ACK register before setting it again.
- Use _MASKED_BIT_ENABLE/DISABLE macros
- Only use bit0 of the forcewake reg, not all 16 bits.
- check the gtfifodb reg like on all other platforms in _put.
- Drop the POSTING_READs for consistency.

v2: Failure to git add ... again.

v3: Fixup the spelling fail a bit.

Tested-by: Purushothaman, Vijay A vijay.a.purushotha...@intel.com
Cc: Widawsky, Benjamin benjamin.widaw...@intel.com
Signed-Off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 drivers/gpu/drm/i915/intel_pm.c |   14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c0721ff..1a197da 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4075,12 +4075,10 @@ int __gen6_gt_wait_for_fifo(struct drm_i915_private 
*dev_priv)
 
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
-   /* Already awake? */
-   if ((I915_READ(0x130094)  0xa1) == 0xa1)
-   return;
+   if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1) == 0, 
500))
+   DRM_ERROR(Force wake wait timed out\n);
 
-   I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
-   POSTING_READ(FORCEWAKE_VLV);
+   I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(1));
 
if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1), 500))
DRM_ERROR(Force wake wait timed out\n);
@@ -4090,9 +4088,9 @@ static void vlv_force_wake_get(struct drm_i915_private 
*dev_priv)
 
 static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 {
-   I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
-   /* FIXME: confirm VLV behavior with Punit folks */
-   POSTING_READ(FORCEWAKE_VLV);
+   I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(1));
+   /* The below doubles as a POSTING_READ */
+   gen6_gt_check_fifodbg(dev_priv);
 }
 
 void intel_gt_init(struct drm_device *dev)
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: differe error message between forcwake timeouts

2012-08-24 Thread Daniel Vetter
ickle danvet: in the force wake, both DRM_ERRORs have the same string.
ickle useful for .txt shrinkage, horrible for debugging

Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 drivers/gpu/drm/i915/intel_pm.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1a197da..1e4d25e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3969,13 +3969,13 @@ static void __gen6_gt_force_wake_get(struct 
drm_i915_private *dev_priv)
forcewake_ack = FORCEWAKE_ACK;
 
if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0, 
500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake old ack to 
clear.\n);
 
I915_WRITE_NOTRACE(FORCEWAKE, 1);
POSTING_READ(FORCEWAKE);
 
if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1), 500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake to ack request.\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
 }
@@ -3990,13 +3990,13 @@ static void __gen6_gt_force_wake_mt_get(struct 
drm_i915_private *dev_priv)
forcewake_ack = FORCEWAKE_MT_ACK;
 
if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0, 
500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake old ack to 
clear.\n);
 
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
POSTING_READ(FORCEWAKE_MT);
 
if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1), 500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake to ack request.\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
 }
@@ -4076,12 +4076,12 @@ int __gen6_gt_wait_for_fifo(struct drm_i915_private 
*dev_priv)
 static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
 {
if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1) == 0, 
500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake old ack to 
clear.\n);
 
I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(1));
 
if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1), 500))
-   DRM_ERROR(Force wake wait timed out\n);
+   DRM_ERROR(Timed out waiting for forcewake to ack request.\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
 }
-- 
1.7.10.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Use the correct size of the GTT for placing the per-process entries

2012-08-24 Thread Ben Widawsky

On 2012-08-24 01:12, Chris Wilson wrote:
The current layout is to place the per-process tables at the end of 
the
GTT. However, this is currently using a hardcoded maximum size for 
the GTT
and not taking in account limitations imposed by the BIOS. Use the 
value

for the total number of entries allocated in the table as provided by
the configuration registers.

Reported-by: Matthew Garrett m...@redhat.com
Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Cc: Ben Widawsky b...@bwidawsk.net
Cc: Matthew Garret m...@redhat.com


details... Can someone remind me why we didn't put it at the bottom?
Reviewed-by: Ben Widawsky b...@bwidawsk.net


---
 drivers/gpu/drm/i915/i915_gem_gtt.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 528fd43..4c03544 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -72,7 +72,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_device 
*dev)
 	/* ppgtt PDEs reside in the global gtt pagetable, which has 
512*1024
 	 * entries. For aliasing ppgtt support we just steal them at the 
end for

 * now. */
-   first_pd_entry_in_global_pt = 512*1024 - I915_PPGTT_PD_ENTRIES;
+   first_pd_entry_in_global_pt = dev_priv-mm.gtt-gtt_total_entries -
I915_PPGTT_PD_ENTRIES;

ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
if (!ppgtt)


--
Ben Widawsky, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Use the correct size of the GTT for placing the per-process entries

2012-08-24 Thread Daniel Vetter
On Fri, Aug 24, 2012 at 10:34:13AM -0700, Ben Widawsky wrote:
 On 2012-08-24 01:12, Chris Wilson wrote:
 The current layout is to place the per-process tables at the end
 of the
 GTT. However, this is currently using a hardcoded maximum size for
 the GTT
 and not taking in account limitations imposed by the BIOS. Use the
 value
 for the total number of entries allocated in the table as provided by
 the configuration registers.
 
 Reported-by: Matthew Garrett m...@redhat.com
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Ben Widawsky b...@bwidawsk.net
 Cc: Matthew Garret m...@redhat.com
 
 details... Can someone remind me why we didn't put it at the bottom?
 Reviewed-by: Ben Widawsky b...@bwidawsk.net

Becaus the bottom is mappable, which is a contended resources (compared to
the entire gtt). Or so was my thinking at least.
-Daniel
-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer

2012-08-24 Thread Chris Wilson
If we need to stall in order to complete the pin_and_fence operation
during execbuffer reservation, there is a high likelihood that the
operation will be interrupted by a signal (thanks X!). In order to
simplify the cleanup along that error path, the object was
unconditionally unbound and the error propagated. However, being
interrupted here is far more common than I would like and so we can
strive to avoid the extra work by eliminating the forced unbind.

v2: In discussion over the indecent colour of the new functions and
unwind path, we realised that we can use the new unreserve function to
clean up the code even further.

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  114 +++-
 1 file changed, 45 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dc87563..e6b2205 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
return ret;
 }
 
-#define  __EXEC_OBJECT_HAS_FENCE (131)
+#define  __EXEC_OBJECT_HAS_PIN (131)
+#define  __EXEC_OBJECT_HAS_FENCE (130)
 
 static int
 need_reloc_mappable(struct drm_i915_gem_object *obj)
@@ -341,9 +342,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
 }
 
 static int
-pin_and_fence_object(struct drm_i915_gem_object *obj,
-struct intel_ring_buffer *ring)
+i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
+  struct intel_ring_buffer *ring)
 {
+   struct drm_i915_private *dev_priv = obj-base.dev-dev_private;
struct drm_i915_gem_exec_object2 *entry = obj-exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
bool need_fence, need_mappable;
@@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
if (ret)
return ret;
 
+   entry-flags |= __EXEC_OBJECT_HAS_PIN;
+
if (has_fenced_gpu_access) {
if (entry-flags  EXEC_OBJECT_NEEDS_FENCE) {
ret = i915_gem_object_get_fence(obj);
if (ret)
-   goto err_unpin;
+   return ret;
 
if (i915_gem_object_pin_fence(obj))
entry-flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
}
}
 
+   /* Ensure ppgtt mapping exists if needed */
+   if (dev_priv-mm.aliasing_ppgtt  !obj-has_aliasing_ppgtt_mapping) {
+   i915_ppgtt_bind_object(dev_priv-mm.aliasing_ppgtt,
+  obj, obj-cache_level);
+
+   obj-has_aliasing_ppgtt_mapping = 1;
+   }
+
entry-offset = obj-gtt_offset;
return 0;
+}
 
-err_unpin:
-   i915_gem_object_unpin(obj);
-   return ret;
+static void
+i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
+{
+   struct drm_i915_gem_exec_object2 *entry;
+
+   if (!obj-gtt_space)
+   return;
+
+   entry = obj-exec_entry;
+
+   if (entry-flags  __EXEC_OBJECT_HAS_FENCE)
+   i915_gem_object_unpin_fence(obj);
+
+   if (entry-flags  __EXEC_OBJECT_HAS_PIN)
+   i915_gem_object_unpin(obj);
+
+   entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 }
 
 static int
@@ -385,11 +412,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
*ring,
struct drm_file *file,
struct list_head *objects)
 {
-   drm_i915_private_t *dev_priv = ring-dev-dev_private;
struct drm_i915_gem_object *obj;
-   int ret, retry;
-   bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
struct list_head ordered_objects;
+   bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
+   int retry;
 
INIT_LIST_HEAD(ordered_objects);
while (!list_empty(objects)) {
@@ -427,12 +453,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
*ring,
 * 2.  Bind new objects.
 * 3.  Decrement pin count.
 *
-* This avoid unnecessary unbinding of later objects in order to makr
+* This avoid unnecessary unbinding of later objects in order to make
 * room for the earlier objects *unless* we need to defragment.
 */
retry = 0;
do {
-   ret = 0;
+   int ret = 0;
 
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(obj, objects, exec_list) {
@@ -452,7 +478,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
(need_mappable  !obj-map_and_fenceable))
ret = 

Re: [Intel-gfx] [PATCH] drm/i915: Avoid unbinding due to an interrupted pin_and_fence during execbuffer

2012-08-24 Thread Daniel Vetter
On Fri, Aug 24, 2012 at 07:18:18PM +0100, Chris Wilson wrote:
 If we need to stall in order to complete the pin_and_fence operation
 during execbuffer reservation, there is a high likelihood that the
 operation will be interrupted by a signal (thanks X!). In order to
 simplify the cleanup along that error path, the object was
 unconditionally unbound and the error propagated. However, being
 interrupted here is far more common than I would like and so we can
 strive to avoid the extra work by eliminating the forced unbind.
 
 v2: In discussion over the indecent colour of the new functions and
 unwind path, we realised that we can use the new unreserve function to
 clean up the code even further.
 
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk

Nice colours, merged to dinq.
-Daniel

 ---
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  114 
 +++-
  1 file changed, 45 insertions(+), 69 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
 b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 index dc87563..e6b2205 100644
 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
 @@ -331,7 +331,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
   return ret;
  }
  
 -#define  __EXEC_OBJECT_HAS_FENCE (131)
 +#define  __EXEC_OBJECT_HAS_PIN (131)
 +#define  __EXEC_OBJECT_HAS_FENCE (130)
  
  static int
  need_reloc_mappable(struct drm_i915_gem_object *obj)
 @@ -341,9 +342,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
  }
  
  static int
 -pin_and_fence_object(struct drm_i915_gem_object *obj,
 -  struct intel_ring_buffer *ring)
 +i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 +struct intel_ring_buffer *ring)
  {
 + struct drm_i915_private *dev_priv = obj-base.dev-dev_private;
   struct drm_i915_gem_exec_object2 *entry = obj-exec_entry;
   bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
   bool need_fence, need_mappable;
 @@ -359,11 +361,13 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
   if (ret)
   return ret;
  
 + entry-flags |= __EXEC_OBJECT_HAS_PIN;
 +
   if (has_fenced_gpu_access) {
   if (entry-flags  EXEC_OBJECT_NEEDS_FENCE) {
   ret = i915_gem_object_get_fence(obj);
   if (ret)
 - goto err_unpin;
 + return ret;
  
   if (i915_gem_object_pin_fence(obj))
   entry-flags |= __EXEC_OBJECT_HAS_FENCE;
 @@ -372,12 +376,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
   }
   }
  
 + /* Ensure ppgtt mapping exists if needed */
 + if (dev_priv-mm.aliasing_ppgtt  !obj-has_aliasing_ppgtt_mapping) {
 + i915_ppgtt_bind_object(dev_priv-mm.aliasing_ppgtt,
 +obj, obj-cache_level);
 +
 + obj-has_aliasing_ppgtt_mapping = 1;
 + }
 +
   entry-offset = obj-gtt_offset;
   return 0;
 +}
  
 -err_unpin:
 - i915_gem_object_unpin(obj);
 - return ret;
 +static void
 +i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
 +{
 + struct drm_i915_gem_exec_object2 *entry;
 +
 + if (!obj-gtt_space)
 + return;
 +
 + entry = obj-exec_entry;
 +
 + if (entry-flags  __EXEC_OBJECT_HAS_FENCE)
 + i915_gem_object_unpin_fence(obj);
 +
 + if (entry-flags  __EXEC_OBJECT_HAS_PIN)
 + i915_gem_object_unpin(obj);
 +
 + entry-flags = ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
  }
  
  static int
 @@ -385,11 +412,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
 *ring,
   struct drm_file *file,
   struct list_head *objects)
  {
 - drm_i915_private_t *dev_priv = ring-dev-dev_private;
   struct drm_i915_gem_object *obj;
 - int ret, retry;
 - bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
   struct list_head ordered_objects;
 + bool has_fenced_gpu_access = INTEL_INFO(ring-dev)-gen  4;
 + int retry;
  
   INIT_LIST_HEAD(ordered_objects);
   while (!list_empty(objects)) {
 @@ -427,12 +453,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
 *ring,
* 2.  Bind new objects.
* 3.  Decrement pin count.
*
 -  * This avoid unnecessary unbinding of later objects in order to makr
 +  * This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
   retry = 0;
   do {
 - ret = 0;
 + int ret = 0;
  
   /* Unbind any ill-fitting objects or pin. */
   list_for_each_entry(obj, objects, exec_list) {
 @@ -452,7 +478,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer 
 *ring,

[Intel-gfx] [PATCH 0/3] Some forcewake fixes

2012-08-24 Thread Ben Widawsky
I have no proof that any of these are relevant other than a designer in the
know told me we should be doing this. Furthermore if Mathew's hang is truly on
the very first read to the FORCEWAKE_ACK, these patches almost surely will
*not* help :-(

Ben Widawsky (3):
  drm/i915: Extract forcewake ack timeout
  drm/i915: Change forcewake timeout to 2ms
  drm/i915: Never read FORCEWAKE

 drivers/gpu/drm/i915/intel_pm.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

-- 
1.7.12

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] drm/i915: Extract forcewake ack timeout

2012-08-24 Thread Ben Widawsky
It's used all over the place, and we want to be able to play around with
the value, apparently. Note that it doesn't touch other timeouts of the
same value (like gtfifo, and thread C0 wait).

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/intel_pm.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c0721ff..f42c142 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -31,6 +31,8 @@
 #include ../../../platform/x86/intel_ips.h
 #include linux/module.h
 
+#define FORCEWAKE_ACK_TIMEOUT_US 500
+
 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
  * framebuffer contents in-memory, aiming at reducing the required bandwidth
  * during in-memory transfers and, therefore, reduce the power packet.
@@ -3968,13 +3970,15 @@ static void __gen6_gt_force_wake_get(struct 
drm_i915_private *dev_priv)
else
forcewake_ack = FORCEWAKE_ACK;
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0, 
500))
+   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
+  FORCEWAKE_ACK_TIMEOUT_US))
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE, 1);
POSTING_READ(FORCEWAKE);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1), 500))
+   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1),
+  FORCEWAKE_ACK_TIMEOUT_US))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
@@ -3989,13 +3993,15 @@ static void __gen6_gt_force_wake_mt_get(struct 
drm_i915_private *dev_priv)
else
forcewake_ack = FORCEWAKE_MT_ACK;
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0, 
500))
+   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
+  FORCEWAKE_ACK_TIMEOUT_US))
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
POSTING_READ(FORCEWAKE_MT);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1), 500))
+   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1),
+  FORCEWAKE_ACK_TIMEOUT_US))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
@@ -4082,7 +4088,8 @@ static void vlv_force_wake_get(struct drm_i915_private 
*dev_priv)
I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
POSTING_READ(FORCEWAKE_VLV);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1), 500))
+   if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1),
+  FORCEWAKE_ACK_TIMEOUT_US))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
-- 
1.7.12

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/3] drm/i915: Change forcewake timeout to 2ms

2012-08-24 Thread Ben Widawsky
A designer familiar with the hardware has stated that the forcewake
timeout can theoretically be as high as a little over 1ms. Therefore we
modify our code to use 2ms (appropriate fudge and because we don't want
to round down).

Hopefully this can't prevent spurious timeouts.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/intel_pm.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f42c142..2a8468d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -31,7 +31,7 @@
 #include ../../../platform/x86/intel_ips.h
 #include linux/module.h
 
-#define FORCEWAKE_ACK_TIMEOUT_US 500
+#define FORCEWAKE_ACK_TIMEOUT_MS 2
 
 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
  * framebuffer contents in-memory, aiming at reducing the required bandwidth
@@ -3970,15 +3970,15 @@ static void __gen6_gt_force_wake_get(struct 
drm_i915_private *dev_priv)
else
forcewake_ack = FORCEWAKE_ACK;
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
-  FORCEWAKE_ACK_TIMEOUT_US))
+   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
+   FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE, 1);
POSTING_READ(FORCEWAKE);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1),
-  FORCEWAKE_ACK_TIMEOUT_US))
+   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
+   FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
@@ -3993,15 +3993,15 @@ static void __gen6_gt_force_wake_mt_get(struct 
drm_i915_private *dev_priv)
else
forcewake_ack = FORCEWAKE_MT_ACK;
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
-  FORCEWAKE_ACK_TIMEOUT_US))
+   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1) == 0,
+   FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
POSTING_READ(FORCEWAKE_MT);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(forcewake_ack)  1),
-  FORCEWAKE_ACK_TIMEOUT_US))
+   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
+   FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
@@ -4088,8 +4088,8 @@ static void vlv_force_wake_get(struct drm_i915_private 
*dev_priv)
I915_WRITE_NOTRACE(FORCEWAKE_VLV, 0x);
POSTING_READ(FORCEWAKE_VLV);
 
-   if (wait_for_atomic_us((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1),
-  FORCEWAKE_ACK_TIMEOUT_US))
+   if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV)  1),
+   FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR(Force wake wait timed out\n);
 
__gen6_gt_wait_for_thread_c0(dev_priv);
-- 
1.7.12

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/3] drm/i915: Never read FORCEWAKE

2012-08-24 Thread Ben Widawsky
The same designer from the previous patch has told us to never read
FORCEWAKE. We only do this for the POSTING_READ(), so simply change that
to something within the same cacheline (for no reason in particular
other than it sounds nice). In the _mt case we can leverage
the gtfifodbg check for the POSTING_READ.

Signed-off-by: Ben Widawsky b...@bwidawsk.net
---
 drivers/gpu/drm/i915/intel_pm.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 2a8468d..83ec02c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3975,7 +3975,7 @@ static void __gen6_gt_force_wake_get(struct 
drm_i915_private *dev_priv)
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE, 1);
-   POSTING_READ(FORCEWAKE);
+   POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE 
*/
 
if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
FORCEWAKE_ACK_TIMEOUT_MS))
@@ -3998,7 +3998,7 @@ static void __gen6_gt_force_wake_mt_get(struct 
drm_i915_private *dev_priv)
DRM_ERROR(Force wake wait timed out\n);
 
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
-   POSTING_READ(FORCEWAKE_MT);
+   POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE 
*/
 
if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
FORCEWAKE_ACK_TIMEOUT_MS))
@@ -4035,14 +4035,12 @@ void gen6_gt_check_fifodbg(struct drm_i915_private 
*dev_priv)
 static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
I915_WRITE_NOTRACE(FORCEWAKE, 0);
-   POSTING_READ(FORCEWAKE);
gen6_gt_check_fifodbg(dev_priv);
 }
 
 static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
 {
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1));
-   POSTING_READ(FORCEWAKE_MT);
gen6_gt_check_fifodbg(dev_priv);
 }
 
-- 
1.7.12

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] drm/i915: Never read FORCEWAKE

2012-08-24 Thread Daniel Vetter
On Fri, Aug 24, 2012 at 12:31:11PM -0700, Ben Widawsky wrote:
 The same designer from the previous patch has told us to never read
 FORCEWAKE. We only do this for the POSTING_READ(), so simply change that
 to something within the same cacheline (for no reason in particular
 other than it sounds nice). In the _mt case we can leverage
 the gtfifodbg check for the POSTING_READ.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

This partially reverts

commit 6af2d180f82151cf3d58952e35a4f96e45bc453a
Author: Daniel Vetter daniel.vet...@ffwll.ch
Date:   Thu Jul 26 16:24:50 2012 +0200

drm/i915: fix forcewake related hangs on snb

I guess the commit message should mention that. Also, please add a comment
it the _put functions that the check_fifodbg serves as a posting read (and
that we have experimental evidence suggesting that we should readback
something nearby).

btw, can you please ask your nice designer whether he has any clue whether
that nearby posting_read has an effect?

Thanks, Daniel
 ---
  drivers/gpu/drm/i915/intel_pm.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
 index 2a8468d..83ec02c 100644
 --- a/drivers/gpu/drm/i915/intel_pm.c
 +++ b/drivers/gpu/drm/i915/intel_pm.c
 @@ -3975,7 +3975,7 @@ static void __gen6_gt_force_wake_get(struct 
 drm_i915_private *dev_priv)
   DRM_ERROR(Force wake wait timed out\n);
  
   I915_WRITE_NOTRACE(FORCEWAKE, 1);
 - POSTING_READ(FORCEWAKE);
 + POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE 
 */
  
   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
   FORCEWAKE_ACK_TIMEOUT_MS))
 @@ -3998,7 +3998,7 @@ static void __gen6_gt_force_wake_mt_get(struct 
 drm_i915_private *dev_priv)
   DRM_ERROR(Force wake wait timed out\n);
  
   I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
 - POSTING_READ(FORCEWAKE_MT);
 + POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE 
 */
  
   if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack)  1),
   FORCEWAKE_ACK_TIMEOUT_MS))
 @@ -4035,14 +4035,12 @@ void gen6_gt_check_fifodbg(struct drm_i915_private 
 *dev_priv)
  static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
  {
   I915_WRITE_NOTRACE(FORCEWAKE, 0);
 - POSTING_READ(FORCEWAKE);
   gen6_gt_check_fifodbg(dev_priv);
  }
  
  static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
  {
   I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1));
 - POSTING_READ(FORCEWAKE_MT);
   gen6_gt_check_fifodbg(dev_priv);
  }
  
 -- 
 1.7.12
 
 ___
 Intel-gfx mailing list
 Intel-gfx@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: differe error message between forcwake timeouts

2012-08-24 Thread Paul Menzel
Am Freitag, den 24.08.2012, 17:26 +0200 schrieb Daniel Vetter:

In the commit summary: s,differe,different,

 ickle danvet: in the force wake, both DRM_ERRORs have the same string.
 ickle useful for .txt shrinkage, horrible for debugging
 
 Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch

 ---
  drivers/gpu/drm/i915/intel_pm.c |   12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

[…]

Acked-by: Paul Menzel paulepan...@users.sourceforge.net


Thanks,

Paul


signature.asc
Description: This is a digitally signed message part
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx