Running the updated xf86-video-intel driver uncovered a bug in the
kernel drm code. The page fault handler wasn't handling some of the
possible errors correctly. This made the X server die with a SIGSEGV.
The diff below brings things closer to what Linux does, and seems to
fix the crashes I was seeing. A bit more testing would be welcome
though. Note that this needs the commit to drmP.h I just made, which
might not have made it to all the anoncvs servers yet.
Index: i915_gem.c
===
RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_gem.c,v
retrieving revision 1.68
diff -u -p -r1.68 i915_gem.c
--- i915_gem.c2 Feb 2014 10:54:10 - 1.68
+++ i915_gem.c4 Feb 2014 22:47:53 -
@@ -1522,6 +1522,7 @@ i915_gem_fault(struct drm_gem_object *ge
NULL, NULL);
DRM_UNLOCK();
dev_priv-entries--;
+ pmap_update(ufi-orig_map-pmap);
uvm_wait(intelflt);
return (VM_PAGER_REFAULT);
}
@@ -1533,18 +1534,42 @@ unlock:
DRM_UNLOCK();
dev_priv-entries--;
pmap_update(ufi-orig_map-pmap);
- if (ret == -EIO) {
+
+ switch (ret) {
+ case -EIO:
+ /* If this -EIO is due to a gpu hang, give the reset code a
+ * chance to clean up the mess. Otherwise return the proper
+ * SIGBUS. */
+ if (!atomic_read(dev_priv-mm.wedged))
+ return VM_PAGER_ERROR;
+ case -EAGAIN:
+ /* Give the error handler a chance to run and move the
+ * objects off the GPU active list. Next time we service the
+ * fault, we should be able to transition the page into the
+ * GTT without touching the GPU (and so avoid further
+ * EIO/EGAIN). If the GPU is wedged, then there is no issue
+ * with coherency, just lost writes.
+ */
+#if 0
+ set_need_resched();
+#endif
+ case 0:
+ case -ERESTART:
+ case -EINTR:
+ case -EBUSY:
/*
- * EIO means we're wedged, so upon resetting the gpu we'll
- * be alright and can refault. XXX only on resettable chips.
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
*/
- ret = VM_PAGER_REFAULT;
- } else if (ret) {
- ret = VM_PAGER_ERROR;
- } else {
- ret = VM_PAGER_OK;
+ return VM_PAGER_OK;
+ case -ENOMEM:
+ return VM_PAGER_ERROR;
+ case -ENOSPC:
+ return VM_PAGER_ERROR;
+ default:
+ WARN_ONCE(ret, unhandled error in i915_gem_fault: %i\n, ret);
+ return VM_PAGER_ERROR;
}
- return ret;
}
/**
Thanks, this works good for me too -- no X segfaults anymore.