So don't return -EAGAIN, even in the case of a gpu hang. Remap it to -EIO
instead.

This is a bit ugly because intel_ring_begin is all non-interruptible
and hence only returns -EIO. But as the comment in there says,
auditing all the callsites would be a pain.

To avoid duplicating code, reuse i915_gem_check_wedge in __wait_seqno.
Use the opportunity to clarify the different cases a bit with
comments.

v2: Don't access dev_priv->mm.interruptible from check_wedge - we
might not hold dev->struct_mutex, making this racy. Instead pass
interruptible in as a parameter. I've noticed this because I've hit a
BUG_ON(!mutex_is_locked) at the top of check_wedge. This has been
added in

commit b4aca0106c466b5a0329318203f65bac2d91b682
Author: Ben Widawsky <[email protected]>
Date:   Wed Apr 25 20:50:12 2012 -0700

    drm/i915: extract some common olr+wedge code

although that commit is missing any justification for this it. I guess
it's just copy&paste, because the same commit add the same BUG_ON
check to check_olr, where it indeed makes sense.

But in check_wedge everything we access is protected by other means,
so this is superflous. And because it now gets in the way (we add a
new caller in __wait_seqno, which can be called without
dev->struct_mutext) let's just remove it.

Signed-Off-by: Daniel Vetter <[email protected]>
---
 drivers/gpu/drm/i915/i915_drv.h         |    3 ++-
 drivers/gpu/drm/i915/i915_gem.c         |   24 +++++++++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +-
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 86ac9ff..ab9ade0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1330,7 +1330,8 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object 
*obj)
 
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring);
-int __must_check i915_gem_check_wedge(struct drm_i915_private *dev_priv);
+int __must_check i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+                                     bool interruptible);
 
 void i915_gem_reset(struct drm_device *dev);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1214850..af6a510 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1864,10 +1864,9 @@ i915_gem_retire_work_handler(struct work_struct *work)
 }
 
 int
-i915_gem_check_wedge(struct drm_i915_private *dev_priv)
+i915_gem_check_wedge(struct drm_i915_private *dev_priv,
+                    bool interruptible)
 {
-       BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
-
        if (atomic_read(&dev_priv->mm.wedged)) {
                struct completion *x = &dev_priv->error_completion;
                bool recovery_complete;
@@ -1878,7 +1877,16 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv)
                recovery_complete = x->done > 0;
                spin_unlock_irqrestore(&x->wait.lock, flags);
 
-               return recovery_complete ? -EIO : -EAGAIN;
+               /* Non-interruptible callers can't handle -EAGAIN, hence return
+                * -EIO unconditionally for these. */
+               if (!interruptible)
+                       return -EIO;
+
+               /* Recovery complete, but still wedged means reset failure. */
+               if (recovery_complete)
+                       return -EIO;
+
+               return -EAGAIN;
        }
 
        return 0;
@@ -1932,6 +1940,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
        unsigned long timeout_jiffies;
        long end;
        bool wait_forever = true;
+       int ret;
 
        if (i915_seqno_passed(ring->get_seqno(ring), seqno))
                return 0;
@@ -1963,8 +1972,9 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
                        end = wait_event_timeout(ring->irq_queue, EXIT_COND,
                                                 timeout_jiffies);
 
-               if (atomic_read(&dev_priv->mm.wedged))
-                       end = -EAGAIN;
+               ret = i915_gem_check_wedge(dev_priv, interruptible);
+               if (ret)
+                       end = ret;
        } while (end == 0 && wait_forever);
 
        getrawmonotonic(&now);
@@ -2004,7 +2014,7 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t 
seqno)
 
        BUG_ON(seqno == 0);
 
-       ret = i915_gem_check_wedge(dev_priv);
+       ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
        if (ret)
                return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1e86894..6c024d4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1221,7 +1221,7 @@ int intel_wait_ring_buffer(struct intel_ring_buffer 
*ring, int n)
 
                msleep(1);
 
-               ret = i915_gem_check_wedge(dev_priv);
+               ret = i915_gem_check_wedge(dev_priv, 
dev_priv->mm.interruptible);
                if (ret)
                        return ret;
        } while (!time_after(jiffies, end));
-- 
1.7.10

_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to