Running the updated xf86-video-intel driver uncovered a bug in the
kernel drm code.  The page fault handler wasn't handling some of the
possible errors correctly.  This made the X server die with a SIGSEGV.
The diff below brings things closer to what Linux does, and seems to
fix the crashes I was seeing.  A bit more testing would be welcome
though.  Note that this needs the commit to drmP.h I just made, which
might not have made it to all the anoncvs servers yet.


Index: i915_gem.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_gem.c,v
retrieving revision 1.68
diff -u -p -r1.68 i915_gem.c
--- i915_gem.c  2 Feb 2014 10:54:10 -0000       1.68
+++ i915_gem.c  4 Feb 2014 22:47:53 -0000
@@ -1522,6 +1522,7 @@ i915_gem_fault(struct drm_gem_object *ge
                            NULL, NULL);
                        DRM_UNLOCK();
                        dev_priv->entries--;
+                       pmap_update(ufi->orig_map->pmap);
                        uvm_wait("intelflt");
                        return (VM_PAGER_REFAULT);
                }
@@ -1533,18 +1534,42 @@ unlock:
        DRM_UNLOCK();
        dev_priv->entries--;
        pmap_update(ufi->orig_map->pmap);
-       if (ret == -EIO) {
+
+       switch (ret) {
+       case -EIO:
+               /* If this -EIO is due to a gpu hang, give the reset code a
+                * chance to clean up the mess. Otherwise return the proper
+                * SIGBUS. */
+               if (!atomic_read(&dev_priv->mm.wedged))
+                       return VM_PAGER_ERROR;
+       case -EAGAIN:
+               /* Give the error handler a chance to run and move the
+                * objects off the GPU active list. Next time we service the
+                * fault, we should be able to transition the page into the
+                * GTT without touching the GPU (and so avoid further
+                * EIO/EGAIN). If the GPU is wedged, then there is no issue
+                * with coherency, just lost writes.
+                */
+#if 0
+               set_need_resched();
+#endif
+       case 0:
+       case -ERESTART:
+       case -EINTR:
+       case -EBUSY:
                /*
-                * EIO means we're wedged, so upon resetting the gpu we'll
-                * be alright and can refault. XXX only on resettable chips.
+                * EBUSY is ok: this just means that another thread
+                * already did the job.
                 */
-               ret = VM_PAGER_REFAULT;
-       } else if (ret) {
-               ret = VM_PAGER_ERROR;
-       } else {
-               ret = VM_PAGER_OK;
+               return VM_PAGER_OK;
+       case -ENOMEM:
+               return VM_PAGER_ERROR;
+       case -ENOSPC:
+               return VM_PAGER_ERROR;
+       default:
+               WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
+               return VM_PAGER_ERROR;
        }
-       return ret;
 }
 
 /**

Reply via email to