Module Name:    xsrc
Committed By:   snj
Date:           Fri Jan 16 21:32:11 UTC 2015

Modified Files:
        xsrc/external/mit/xf86-video-intel/dist/src: backlight.c
        xsrc/external/mit/xf86-video-intel/dist/src/sna: kgem.c sna_display.c
        xsrc/external/mit/xf86-video-intel/dist/src/uxa: intel.h
Removed Files:
        xsrc/external/mit/xf86-video-intel/dist/src/uxa: intel_glamor.c
            intel_glamor.h uxa-glamor.h

Log Message:
merge conflicts


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 \
    xsrc/external/mit/xf86-video-intel/dist/src/backlight.c
cvs rdiff -u -r1.3 -r1.4 \
    xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c
cvs rdiff -u -r1.2 -r1.3 \
    xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c
cvs rdiff -u -r1.2 -r1.3 \
    xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h
cvs rdiff -u -r1.1.1.2 -r0 \
    xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel_glamor.c \
    xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel_glamor.h
cvs rdiff -u -r1.1.1.1 -r0 \
    xsrc/external/mit/xf86-video-intel/dist/src/uxa/uxa-glamor.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: xsrc/external/mit/xf86-video-intel/dist/src/backlight.c
diff -u xsrc/external/mit/xf86-video-intel/dist/src/backlight.c:1.2 xsrc/external/mit/xf86-video-intel/dist/src/backlight.c:1.3
--- xsrc/external/mit/xf86-video-intel/dist/src/backlight.c:1.2	Wed Nov  5 17:58:59 2014
+++ xsrc/external/mit/xf86-video-intel/dist/src/backlight.c	Fri Jan 16 21:32:11 2015
@@ -81,6 +81,7 @@ void backlight_init(struct backlight *b)
 	b->fd = -1;
 	b->pid = -1;
 	b->max = -1;
+	b->has_power = 0;
 }
 
 #if defined(__OpenBSD__) || defined(__NetBSD__)
@@ -153,6 +154,15 @@ enum backlight_type backlight_exists(con
 	return BL_PLATFORM;
 }
 
+int backlight_on(struct backlight *b)
+{
+	return 0;
+}
+
+int backlight_off(struct backlight *b)
+{
+	return 0;
+}
 #else
 
 static int
@@ -202,6 +212,21 @@ __backlight_read(const char *iface, cons
 	return val;
 }
 
+static int
+__backlight_write(const char *iface, const char *file, const char *value)
+{
+	int fd, ret;
+
+	fd = __backlight_open(iface, file, O_WRONLY);
+	if (fd < 0)
+		return -1;
+
+	ret = write(fd, value, strlen(value)+1);
+	close(fd);
+
+	return ret;
+}
+
 /* List of available kernel interfaces in priority order */
 static const char *known_interfaces[] = {
 	"dell_backlight",
@@ -284,6 +309,9 @@ static int __backlight_direct_init(struc
 	if (fd < 0)
 		return 0;
 
+	if (__backlight_read(iface, "bl_power") != -1)
+		b->has_power = 1;
+
 	return __backlight_init(b, iface, fd);
 }
 
@@ -448,6 +476,30 @@ int backlight_get(struct backlight *b)
 		level = -1;
 	return level;
 }
+
+int backlight_off(struct backlight *b)
+{
+	if (b->iface == NULL)
+		return 0;
+
+	if (!b->has_power)
+		return 0;
+
+	/* 4 -> FB_BLANK_POWERDOWN */
+	return __backlight_write(b->iface, "bl_power", "4");
+}
+
+int backlight_on(struct backlight *b)
+{
+	if (b->iface == NULL)
+		return 0;
+
+	if (!b->has_power)
+		return 0;
+
+	/* 0 -> FB_BLANK_UNBLANK */
+	return __backlight_write(b->iface, "bl_power", "0");
+}
 #endif
 
 void backlight_disable(struct backlight *b)

Index: xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c
diff -u xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c:1.3 xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c:1.4
--- xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c:1.3	Wed Nov  5 17:58:59 2014
+++ xsrc/external/mit/xf86-video-intel/dist/src/sna/kgem.c	Fri Jan 16 21:32:11 2015
@@ -79,9 +79,11 @@ search_snoop_cache(struct kgem *kgem, un
 #define DBG_NO_RELAXED_FENCING 0
 #define DBG_NO_SECURE_BATCHES 0
 #define DBG_NO_PINNED_BATCHES 0
+#define DBG_NO_SHRINK_BATCHES 0
 #define DBG_NO_FAST_RELOC 0
 #define DBG_NO_HANDLE_LUT 0
 #define DBG_NO_WT 0
+#define DBG_NO_WC_MMAP 0
 #define DBG_DUMP 0
 #define DBG_NO_MALLOC_CACHE 0
 
@@ -94,6 +96,11 @@ search_snoop_cache(struct kgem *kgem, un
 #define SHOW_BATCH_BEFORE 0
 #define SHOW_BATCH_AFTER 0
 
+#if !USE_WC_MMAP
+#undef DBG_NO_WC_MMAP
+#define DBG_NO_WC_MMAP 1
+#endif
+
 #if 0
 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
 #define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
@@ -126,12 +133,14 @@ search_snoop_cache(struct kgem *kgem, un
 #define LOCAL_I915_PARAM_HAS_BLT		11
 #define LOCAL_I915_PARAM_HAS_RELAXED_FENCING	12
 #define LOCAL_I915_PARAM_HAS_RELAXED_DELTA	15
+#define LOCAL_I915_PARAM_HAS_LLC		17
 #define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
 #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES	24
 #define LOCAL_I915_PARAM_HAS_NO_RELOC		25
 #define LOCAL_I915_PARAM_HAS_HANDLE_LUT		26
 #define LOCAL_I915_PARAM_HAS_WT			27
+#define LOCAL_I915_PARAM_MMAP_VERSION		30
 
 #define LOCAL_I915_EXEC_IS_PINNED		(1<<10)
 #define LOCAL_I915_EXEC_NO_RELOC		(1<<11)
@@ -178,6 +187,17 @@ struct local_i915_gem_caching {
 #define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
 #define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
 
+struct local_i915_gem_mmap2 {
+	uint32_t handle;
+	uint32_t pad;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t addr_ptr;
+	uint64_t flags;
+#define I915_MMAP_WC 0x1
+};
+#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
+
 struct kgem_buffer {
 	struct kgem_bo base;
 	void *mem;
@@ -282,6 +302,13 @@ static void assert_bo_retired(struct kge
 #define assert_bo_retired(bo)
 #endif
 
+static void
+__kgem_set_wedged(struct kgem *kgem)
+{
+	kgem->wedged = true;
+	sna_render_mark_wedged(container_of(kgem, struct sna, kgem));
+}
+
 static void kgem_sna_reset(struct kgem *kgem)
 {
 	struct sna *sna = container_of(kgem, struct sna, kgem);
@@ -403,24 +430,21 @@ static bool __kgem_throttle_retire(struc
 
 static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
 {
-	struct drm_i915_gem_mmap_gtt mmap_arg;
+	struct drm_i915_gem_mmap_gtt gtt;
 	void *ptr;
 	int err;
 
 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
 	     bo->handle, bytes(bo)));
-	assert(bo->proxy == NULL);
-	assert(!bo->snoop);
-	assert(num_pages(bo) <= kgem->aperture_mappable / 4);
 
+	VG_CLEAR(gtt);
 retry_gtt:
-	VG_CLEAR(mmap_arg);
-	mmap_arg.handle = bo->handle;
+	gtt.handle = bo->handle;
 #ifdef __NetBSD__
-	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
+	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt)) {
 		err = errno;
 #else
-	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) {
+	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt))) {
 #endif
 		assert(err != EINVAL);
 
@@ -438,11 +462,11 @@ retry_gtt:
 
 retry_mmap:
 #ifdef __NetBSD__
-	err = -drmMap(kgem->fd, mmap_arg.offset, bytes(bo), &ptr);
+	err = -drmMap(kgem->fd, gtt.offset, bytes(bo), &ptr);
 	if (err) {
 #else
 	ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
-		   kgem->fd, mmap_arg.offset);
+		   kgem->fd, gtt.offset);
 	if (ptr == MAP_FAILED) {
 		err = errno;
 #endif
@@ -459,7 +483,78 @@ retry_mmap:
 		ptr = NULL;
 	}
 
-	return ptr;
+	/* Cache this mapping to avoid the overhead of an
+	 * excruciatingly slow GTT pagefault. This is more an
+	 * issue with compositing managers which need to
+	 * frequently flush CPU damage to their GPU bo.
+	 */
+	return bo->map__gtt = ptr;
+}
+
+static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
+{
+	struct local_i915_gem_mmap2 wc;
+	int err;
+
+	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
+	     bo->handle, bytes(bo)));
+	assert(kgem->has_wc_mmap);
+
+	VG_CLEAR(wc);
+
+retry_wc:
+	wc.handle = bo->handle;
+	wc.offset = 0;
+	wc.size = bytes(bo);
+	wc.flags = I915_MMAP_WC;
+	if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
+		assert(err != EINVAL);
+
+		if (__kgem_throttle_retire(kgem, 0))
+			goto retry_wc;
+
+		if (kgem_cleanup_cache(kgem))
+			goto retry_wc;
+
+		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
+		     __FUNCTION__, bo->handle, bytes(bo), -err));
+		return NULL;
+	}
+
+	VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
+
+	DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
+	return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
+}
+
+static void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
+{
+	struct drm_i915_gem_mmap mmap_arg;
+	int err;
+
+retry:
+	VG_CLEAR(mmap_arg);
+	mmap_arg.handle = bo->handle;
+	mmap_arg.offset = 0;
+	mmap_arg.size = bytes(bo);
+	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) {
+		assert(err != EINVAL);
+
+		if (__kgem_throttle_retire(kgem, 0))
+			goto retry;
+
+		if (kgem_cleanup_cache(kgem))
+			goto retry;
+
+		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
+		     __FUNCTION__, bo->handle, bytes(bo), -err));
+		return NULL;
+	}
+
+	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
+
+	DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
+	return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr;
 }
 
 static int gem_write(int fd, uint32_t handle,
@@ -583,6 +678,7 @@ static void kgem_bo_maybe_retire(struct 
 bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 		   const void *data, int length)
 {
+	void *ptr;
 	int err;
 
 	assert(bo->refcnt);
@@ -591,6 +687,22 @@ bool kgem_bo_write(struct kgem *kgem, st
 
 	assert(length <= bytes(bo));
 retry:
+	ptr = NULL;
+	if (bo->domain == DOMAIN_CPU || (kgem->has_llc && !bo->scanout)) {
+		ptr = bo->map__cpu;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__cpu(kgem, bo);
+	} else if (kgem->has_wc_mmap) {
+		ptr = bo->map__wc;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__wc(kgem, bo);
+	}
+	if (ptr) {
+		/* XXX unsynchronized? */
+		memcpy(ptr, data, length);
+		return true;
+	}
+
 	if ((err = gem_write(kgem->fd, bo->handle, 0, length, data))) {
 		assert(err != EINVAL);
 
@@ -1031,9 +1143,7 @@ static bool test_has_llc(struct kgem *kg
 	if (DBG_NO_LLC)
 		return false;
 
-#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
-	has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
-#endif
+	has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
 	if (has_llc == -1) {
 		DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
 		has_llc = kgem->gen >= 060;
@@ -1042,6 +1152,28 @@ static bool test_has_llc(struct kgem *kg
 	return has_llc;
 }
 
+static bool test_has_wc_mmap(struct kgem *kgem)
+{
+	struct local_i915_gem_mmap2 wc;
+	bool ret;
+
+	if (DBG_NO_WC_MMAP)
+		return false;
+
+	if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
+		return false;
+
+	VG_CLEAR(wc);
+	wc.handle = gem_create(kgem->fd, 1);
+	wc.offset = 0;
+	wc.size = 4096;
+	wc.flags = I915_MMAP_WC;
+	ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
+	gem_close(kgem->fd, wc.handle);
+
+	return ret;
+}
+
 static bool test_has_caching(struct kgem *kgem)
 {
 	uint32_t handle;
@@ -1075,9 +1207,6 @@ static bool test_has_userptr(struct kgem
 	if (kgem->gen == 040)
 		return false;
 
-	if (kgem->gen >= 0100)
-		return false; /* FIXME https://bugs.freedesktop.org/show_bug.cgi?id=79053 */
-
 	if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
 		return false;
 
@@ -1237,10 +1366,13 @@ err:
 
 static void kgem_init_swizzling(struct kgem *kgem)
 {
-	struct drm_i915_gem_get_tiling tiling;
-
-	if (kgem->gen < 050) /* bit17 swizzling :( */
-		return;
+	struct local_i915_gem_get_tiling_v2 {
+		uint32_t handle;
+		uint32_t tiling_mode;
+		uint32_t swizzle_mode;
+		uint32_t phys_swizzle_mode;
+	} tiling;
+#define LOCAL_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct local_i915_gem_get_tiling_v2)
 
 	VG_CLEAR(tiling);
 	tiling.handle = gem_create(kgem->fd, 1);
@@ -1250,7 +1382,10 @@ static void kgem_init_swizzling(struct k
 	if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
 		goto out;
 
-	if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
+	if (do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_GET_TILING, &tiling))
+		goto out;
+
+	if (kgem->gen < 50 && tiling.phys_swizzle_mode != tiling.swizzle_mode)
 		goto out;
 
 	choose_memcpy_tiled_x(kgem, tiling.swizzle_mode);
@@ -1258,6 +1393,117 @@ out:
 	gem_close(kgem->fd, tiling.handle);
 }
 
+static void kgem_fixup_relocs(struct kgem *kgem, struct kgem_bo *bo, int shrink)
+{
+	int n;
+
+	bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
+
+	assert(kgem->nreloc__self <= 256);
+	if (kgem->nreloc__self == 0)
+		return;
+
+	DBG(("%s: fixing up %d%s self-relocations to handle=%p, presumed-offset=%llx\n",
+	     __FUNCTION__, kgem->nreloc__self,
+	     kgem->nreloc__self == 256 ? "+" : "",
+	     bo->handle, (long long)bo->presumed_offset));
+	for (n = 0; n < kgem->nreloc__self; n++) {
+		int i = kgem->reloc__self[n];
+
+		assert(kgem->reloc[i].target_handle == ~0U);
+		kgem->reloc[i].target_handle = bo->target_handle;
+		kgem->reloc[i].presumed_offset = bo->presumed_offset;
+
+		if (kgem->reloc[i].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
+			DBG(("%s: moving base of self-reloc[%d:%d] %d -> %d\n",
+			     __FUNCTION__, n, i,
+			     kgem->reloc[i].delta,
+			     kgem->reloc[i].delta - shrink));
+
+			kgem->reloc[i].delta -= shrink;
+		}
+		kgem->batch[kgem->reloc[i].offset/sizeof(uint32_t)] =
+			kgem->reloc[i].delta + bo->presumed_offset;
+	}
+
+	if (n == 256) {
+		for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
+			if (kgem->reloc[n].target_handle == ~0U) {
+				kgem->reloc[n].target_handle = bo->target_handle;
+				kgem->reloc[n].presumed_offset = bo->presumed_offset;
+
+				if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION) {
+					DBG(("%s: moving base of reloc[%d] %d -> %d\n",
+					     __FUNCTION__, n,
+					     kgem->reloc[n].delta,
+					     kgem->reloc[n].delta - shrink));
+					kgem->reloc[n].delta -= shrink;
+				}
+				kgem->batch[kgem->reloc[n].offset/sizeof(uint32_t)] =
+					kgem->reloc[n].delta + bo->presumed_offset;
+			}
+		}
+	}
+
+	if (shrink) {
+		DBG(("%s: shrinking by %d\n", __FUNCTION__, shrink));
+		for (n = 0; n < kgem->nreloc; n++) {
+			if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
+				kgem->reloc[n].offset -= shrink;
+		}
+	}
+}
+
+static struct kgem_bo *kgem_new_batch(struct kgem *kgem)
+{
+	struct kgem_bo *last;
+	unsigned flags;
+
+	last = kgem->batch_bo;
+	if (last) {
+		kgem_fixup_relocs(kgem, last, 0);
+		kgem->batch = NULL;
+	}
+
+	if (kgem->batch) {
+		assert(last == NULL);
+		return NULL;
+	}
+
+	flags = CREATE_CPU_MAP | CREATE_NO_THROTTLE;
+	if (!kgem->has_llc)
+		flags |= CREATE_UNCACHED;
+
+	kgem->batch_bo = kgem_create_linear(kgem,
+					    sizeof(uint32_t)*kgem->batch_size,
+					    flags);
+	if (kgem->batch_bo)
+		kgem->batch = kgem_bo_map__cpu(kgem, kgem->batch_bo);
+	if (kgem->batch == NULL) {
+		DBG(("%s: unable to map batch bo, mallocing(size=%d)\n",
+		     __FUNCTION__,
+		     sizeof(uint32_t)*kgem->batch_size));
+		if (kgem->batch_bo) {
+			kgem_bo_destroy(kgem, kgem->batch_bo);
+			kgem->batch_bo = NULL;
+		}
+
+		if (posix_memalign((void **)&kgem->batch, PAGE_SIZE,
+				   ALIGN(sizeof(uint32_t) * kgem->batch_size, PAGE_SIZE))) {
+			ERR(("%s: batch allocation failed, disabling acceleration\n", __FUNCTION__));
+			__kgem_set_wedged(kgem);
+		}
+	} else {
+		DBG(("%s: allocated and mapped batch handle=%d [size=%d]\n",
+		     __FUNCTION__, kgem->batch_bo->handle,
+		     sizeof(uint32_t)*kgem->batch_size));
+		kgem_bo_sync__cpu(kgem, kgem->batch_bo);
+	}
+
+	DBG(("%s: using last batch handle=%d\n",
+	     __FUNCTION__, last ? last->handle : 0));
+	return last;
+}
 
 void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 {
@@ -1316,6 +1562,10 @@ void kgem_init(struct kgem *kgem, int fd
 	DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
 	     kgem->has_wt));
 
+	kgem->has_wc_mmap = test_has_wc_mmap(kgem);
+	DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
+	     kgem->has_wc_mmap));
+
 	kgem->has_caching = test_has_caching(kgem);
 	DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
 	     kgem->has_caching));
@@ -1361,14 +1611,14 @@ void kgem_init(struct kgem *kgem, int fd
 	if (!is_hw_supported(kgem, dev)) {
 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
 			   "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
-		kgem->wedged = 1;
+		__kgem_set_wedged(kgem);
 	} else if (__kgem_throttle(kgem, false)) {
 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
 			   "Detected a hung GPU, disabling acceleration.\n");
-		kgem->wedged = 1;
+		__kgem_set_wedged(kgem);
 	}
 
-	kgem->batch_size = ARRAY_SIZE(kgem->batch);
+	kgem->batch_size = UINT16_MAX & ~7;
 	if (gen == 020 && !kgem->has_pinned_batches)
 		/* Limited to what we can pin */
 		kgem->batch_size = 4*1024;
@@ -1383,11 +1633,12 @@ void kgem_init(struct kgem *kgem, int fd
 	if (!kgem_init_pinned_batches(kgem) && gen == 020) {
 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
 			   "Unable to reserve memory for GPU, disabling acceleration.\n");
-		kgem->wedged = 1;
+		__kgem_set_wedged(kgem);
 	}
 
 	DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
 	     kgem->batch_size));
+	kgem_new_batch(kgem);
 
 	kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
 	DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
@@ -1556,6 +1807,8 @@ inline static uint32_t kgem_pitch_alignm
 		return 256;
 	if (flags & CREATE_SCANOUT)
 		return 64;
+	if (kgem->gen >= 0100)
+		return 32;
 	return 8;
 }
 
@@ -1809,36 +2062,6 @@ static uint32_t kgem_end_batch(struct kg
 	return kgem->nbatch;
 }
 
-static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
-{
-	int n;
-
-	assert(kgem->nreloc__self <= 256);
-	if (kgem->nreloc__self == 0)
-		return;
-
-	for (n = 0; n < kgem->nreloc__self; n++) {
-		int i = kgem->reloc__self[n];
-		assert(kgem->reloc[i].target_handle == ~0U);
-		kgem->reloc[i].target_handle = bo->target_handle;
-		kgem->reloc[i].presumed_offset = bo->presumed_offset;
-		kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
-			kgem->reloc[i].delta + bo->presumed_offset;
-	}
-
-	if (n == 256) {
-		for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
-			if (kgem->reloc[n].target_handle == ~0U) {
-				kgem->reloc[n].target_handle = bo->target_handle;
-				kgem->reloc[n].presumed_offset = bo->presumed_offset;
-				kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
-					kgem->reloc[n].delta + bo->presumed_offset;
-			}
-		}
-
-	}
-}
-
 static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
 {
 	struct kgem_bo_binding *b;
@@ -1892,17 +2115,23 @@ static void kgem_bo_free(struct kgem *kg
 
 	DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
 	     __FUNCTION__, bo->map__gtt, bo->map__cpu,
-	     bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count));
+	     bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
 
 	if (!list_is_empty(&bo->vma)) {
 		_list_del(&bo->vma);
-		kgem->vma[bo->map__gtt == NULL].count--;
+		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
 	}
 
 	if (bo->map__gtt)
-		munmap(MAP(bo->map__gtt), bytes(bo));
-	if (bo->map__cpu)
+		munmap(bo->map__gtt, bytes(bo));
+	if (bo->map__wc) {
+		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
+		munmap(bo->map__wc, bytes(bo));
+	}
+	if (bo->map__cpu) {
+		VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
 		munmap(MAP(bo->map__cpu), bytes(bo));
+	}
 
 	_list_del(&bo->list);
 	_list_del(&bo->request);
@@ -1938,25 +2167,24 @@ inline static void kgem_bo_move_to_inact
 
 	if (bucket(bo) >= NUM_CACHE_BUCKETS) {
 		if (bo->map__gtt) {
-			munmap(MAP(bo->map__gtt), bytes(bo));
+			munmap(bo->map__gtt, bytes(bo));
 			bo->map__gtt = NULL;
 		}
 
 		list_move(&bo->list, &kgem->large_inactive);
 	} else {
 		assert(bo->flush == false);
+		assert(list_is_empty(&bo->vma));
 		list_move(&bo->list, &kgem->inactive[bucket(bo)]);
-		if (bo->map__gtt) {
-			if (!kgem_bo_can_map(kgem, bo)) {
-				munmap(MAP(bo->map__gtt), bytes(bo));
-				bo->map__gtt = NULL;
-			}
-			if (bo->map__gtt) {
-				list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
-				kgem->vma[0].count++;
-			}
+		if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
+			munmap(bo->map__gtt, bytes(bo));
+			bo->map__gtt = NULL;
+		}
+		if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
+			list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
+			kgem->vma[0].count++;
 		}
-		if (bo->map__cpu && !bo->map__gtt) {
+		if (bo->map__cpu && list_is_empty(&bo->vma)) {
 			list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
 			kgem->vma[1].count++;
 		}
@@ -2004,9 +2232,9 @@ inline static void kgem_bo_remove_from_i
 	assert(bo->rq == NULL);
 	assert(bo->exec == NULL);
 	if (!list_is_empty(&bo->vma)) {
-		assert(bo->map__gtt || bo->map__cpu);
+		assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
 		list_del(&bo->vma);
-		kgem->vma[bo->map__gtt == NULL].count--;
+		kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
 	}
 }
 
@@ -2446,6 +2674,9 @@ static bool __kgem_retire_rq(struct kgem
 	     __FUNCTION__, rq->bo->handle));
 	assert(RQ(rq->bo->rq) == rq);
 
+	if (rq == kgem->fence[rq->ring])
+		kgem->fence[rq->ring] = NULL;
+
 	while (!list_is_empty(&rq->buffers)) {
 		struct kgem_bo *bo;
 
@@ -2471,6 +2702,7 @@ static bool __kgem_retire_rq(struct kgem
 		}
 
 		bo->domain = DOMAIN_NONE;
+		bo->gtt_dirty = false;
 		bo->rq = NULL;
 		if (bo->refcnt)
 			continue;
@@ -2576,24 +2808,72 @@ bool __kgem_ring_is_idle(struct kgem *kg
 	assert(ring < ARRAY_SIZE(kgem->requests));
 	assert(!list_is_empty(&kgem->requests[ring]));
 
+	rq = kgem->fence[ring];
+	if (rq) {
+		struct kgem_request *tmp;
+
+		if (__kgem_busy(kgem, rq->bo->handle)) {
+			DBG(("%s: last fence handle=%d still busy\n",
+			     __FUNCTION__, rq->bo->handle));
+			return false;
+		}
+
+		do {
+			tmp = list_first_entry(&kgem->requests[ring],
+					       struct kgem_request,
+					       list);
+			assert(tmp->ring == ring);
+			__kgem_retire_rq(kgem, tmp);
+		} while (tmp != rq);
+
+		assert(kgem->fence[ring] == NULL);
+		if (list_is_empty(&kgem->requests[ring]))
+			return true;
+	}
+
 	rq = list_last_entry(&kgem->requests[ring],
 			     struct kgem_request, list);
 	assert(rq->ring == ring);
 	if (__kgem_busy(kgem, rq->bo->handle)) {
 		DBG(("%s: last requests handle=%d still busy\n",
 		     __FUNCTION__, rq->bo->handle));
+		kgem->fence[ring] = rq;
 		return false;
 	}
 
 	DBG(("%s: ring=%d idle (handle=%d)\n",
 	     __FUNCTION__, ring, rq->bo->handle));
 
-	kgem_retire__requests_ring(kgem, ring);
+	while (!list_is_empty(&kgem->requests[ring])) {
+		rq = list_first_entry(&kgem->requests[ring],
+				      struct kgem_request,
+				      list);
+		assert(rq->ring == ring);
+		__kgem_retire_rq(kgem, rq);
+	}
 
-	assert(list_is_empty(&kgem->requests[ring]));
 	return true;
 }
 
+void __kgem_retire_requests_upto(struct kgem *kgem, struct kgem_bo *bo)
+{
+	struct kgem_request *rq = bo->rq, *tmp;
+	struct list *requests = &kgem->requests[RQ_RING(rq) == I915_EXEC_BLT];
+
+	rq = RQ(rq);
+	assert(rq != &kgem->static_request);
+	if (rq == (struct kgem_request *)kgem) {
+		__kgem_bo_clear_busy(bo);
+		return;
+	}
+
+	do {
+		tmp = list_first_entry(requests, struct kgem_request, list);
+		assert(tmp->ring == rq->ring);
+		__kgem_retire_rq(kgem, tmp);
+	} while (tmp != rq);
+}
+
 #if 0
 static void kgem_commit__check_reloc(struct kgem *kgem)
 {
@@ -2689,6 +2969,7 @@ static void kgem_commit(struct kgem *kge
 		assert(list_is_empty(&rq->buffers));
 
 		assert(rq->bo->map__gtt == NULL);
+		assert(rq->bo->map__wc == NULL);
 		assert(rq->bo->map__cpu == NULL);
 		gem_close(kgem->fd, rq->bo->handle);
 		kgem_cleanup_cache(kgem);
@@ -2696,6 +2977,10 @@ static void kgem_commit(struct kgem *kge
 		assert(rq->ring < ARRAY_SIZE(kgem->requests));
 		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
 		kgem->need_throttle = kgem->need_retire = 1;
+
+		if (kgem->fence[rq->ring] == NULL &&
+		    __kgem_busy(kgem, rq->bo->handle))
+			kgem->fence[rq->ring] = rq;
 	}
 
 	kgem->next_request = NULL;
@@ -2927,24 +3212,50 @@ static void kgem_cleanup(struct kgem *kg
 	kgem_close_inactive(kgem);
 }
 
-static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
+static int
+kgem_batch_write(struct kgem *kgem,
+		 struct kgem_bo *bo,
+		 uint32_t size)
 {
+	char *ptr;
 	int ret;
 
-	ASSERT_IDLE(kgem, handle);
+	ASSERT_IDLE(kgem, bo->handle);
 
 #if DBG_NO_EXEC
 	{
 		uint32_t batch[] = { MI_BATCH_BUFFER_END, 0};
-		return gem_write(kgem->fd, handle, 0, sizeof(batch), batch);
+		return gem_write(kgem->fd, bo->handle, 0, sizeof(batch), batch);
 	}
 #endif
 
-
+	assert(!bo->scanout);
 retry:
+	ptr = NULL;
+	if (bo->domain == DOMAIN_CPU || kgem->has_llc) {
+		ptr = bo->map__cpu;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__cpu(kgem, bo);
+	} else if (kgem->has_wc_mmap) {
+		ptr = bo->map__wc;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__wc(kgem, bo);
+	}
+	if (ptr) {
+		memcpy(ptr, kgem->batch, sizeof(uint32_t)*kgem->nbatch);
+		if (kgem->surface != kgem->batch_size) {
+			ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
+			ret -= sizeof(uint32_t) * kgem->surface;
+			ptr += size - ret;
+			memcpy(ptr, kgem->batch + kgem->surface,
+			       (kgem->batch_size - kgem->surface)*sizeof(uint32_t));
+		}
+		return 0;
+	}
+
 	/* If there is no surface data, just upload the batch */
 	if (kgem->surface == kgem->batch_size) {
-		if ((ret = gem_write__cachealigned(kgem->fd, handle,
+		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
 						   0, sizeof(uint32_t)*kgem->nbatch,
 						   kgem->batch)) == 0)
 			return 0;
@@ -2955,7 +3266,7 @@ retry:
 	/* Are the batch pages conjoint with the surface pages? */
 	if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
 		assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
-		if ((ret = gem_write__cachealigned(kgem->fd, handle,
+		if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
 						   0, kgem->batch_size*sizeof(uint32_t),
 						   kgem->batch)) == 0)
 			return 0;
@@ -2964,7 +3275,7 @@ retry:
 	}
 
 	/* Disjoint surface/batch, upload separately */
-	if ((ret = gem_write__cachealigned(kgem->fd, handle,
+	if ((ret = gem_write__cachealigned(kgem->fd, bo->handle,
 					   0, sizeof(uint32_t)*kgem->nbatch,
 					   kgem->batch)))
 		goto expire;
@@ -2972,7 +3283,7 @@ retry:
 	ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
 	ret -= sizeof(uint32_t) * kgem->surface;
 	assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
-	if (gem_write(kgem->fd, handle,
+	if (gem_write(kgem->fd, bo->handle,
 		      size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
 		      kgem->batch + kgem->surface))
 		goto expire;
@@ -2990,7 +3301,7 @@ expire:
 		goto retry;
 
 	ERR(("%s: failed to write batch (handle=%d): %d\n",
-	     __FUNCTION__, handle, -ret));
+	     __FUNCTION__, bo->handle, -ret));
 	return ret;
 }
 
@@ -3047,15 +3358,16 @@ void kgem_reset(struct kgem *kgem)
 	kgem->needs_reservation = false;
 	kgem->flush = 0;
 	kgem->batch_flags = kgem->batch_flags_base;
+	assert(kgem->batch);
 
 	kgem->next_request = __kgem_request_alloc(kgem);
 
 	kgem_sna_reset(kgem);
 }
 
-static int compact_batch_surface(struct kgem *kgem)
+static int compact_batch_surface(struct kgem *kgem, int *shrink)
 {
-	int size, shrink, n;
+	int size, n;
 
 	if (!kgem->has_relaxed_delta)
 		return kgem->batch_size * sizeof(uint32_t);
@@ -3065,29 +3377,23 @@ static int compact_batch_surface(struct 
 	size = n - kgem->surface + kgem->nbatch;
 	size = ALIGN(size, 1024);
 
-	shrink = n - size;
-	if (shrink) {
-		DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
-
-		shrink *= sizeof(uint32_t);
-		for (n = 0; n < kgem->nreloc; n++) {
-			if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
-			    kgem->reloc[n].target_handle == ~0U)
-				kgem->reloc[n].delta -= shrink;
-
-			if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
-				kgem->reloc[n].offset -= shrink;
-		}
-	}
-
+	*shrink = (n - size) * sizeof(uint32_t);
 	return size * sizeof(uint32_t);
 }
 
 static struct kgem_bo *
-kgem_create_batch(struct kgem *kgem, int size)
+kgem_create_batch(struct kgem *kgem)
 {
+#if !DBG_NO_SHRINK_BATCHES
 	struct drm_i915_gem_set_domain set_domain;
 	struct kgem_bo *bo;
+	int shrink = 0;
+	int size;
+
+	if (kgem->surface != kgem->batch_size)
+		size = compact_batch_surface(kgem, &shrink);
+	else
+		size = kgem->nbatch * sizeof(uint32_t);
 
 	if (size <= 4096) {
 		bo = list_first_entry(&kgem->pinned_batches[0],
@@ -3097,10 +3403,12 @@ kgem_create_batch(struct kgem *kgem, int
 out_4096:
 			assert(bo->refcnt > 0);
 			list_move_tail(&bo->list, &kgem->pinned_batches[0]);
-			return kgem_bo_reference(bo);
+			bo = kgem_bo_reference(bo);
+			goto write;
 		}
 
 		if (!__kgem_busy(kgem, bo->handle)) {
+			assert(RQ(bo->rq)->bo == bo);
 			__kgem_retire_rq(kgem, RQ(bo->rq));
 			goto out_4096;
 		}
@@ -3114,7 +3422,8 @@ out_4096:
 out_16384:
 			assert(bo->refcnt > 0);
 			list_move_tail(&bo->list, &kgem->pinned_batches[1]);
-			return kgem_bo_reference(bo);
+			bo = kgem_bo_reference(bo);
+			goto write;
 		}
 
 		if (!__kgem_busy(kgem, bo->handle)) {
@@ -3126,14 +3435,14 @@ out_16384:
 	if (kgem->gen == 020) {
 		bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
 		if (bo)
-			return bo;
+			goto write;
 
 		/* Nothing available for reuse, rely on the kernel wa */
 		if (kgem->has_pinned_batches) {
 			bo = kgem_create_linear(kgem, size, CREATE_CACHED | CREATE_TEMPORARY);
 			if (bo) {
 				kgem->batch_flags &= ~LOCAL_I915_EXEC_IS_PINNED;
-				return bo;
+				goto write;
 			}
 		}
 
@@ -3157,11 +3466,29 @@ out_16384:
 
 			kgem_retire(kgem);
 			assert(bo->rq == NULL);
-			return kgem_bo_reference(bo);
+			bo = kgem_bo_reference(bo);
+			goto write;
 		}
 	}
 
-	return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+	bo = NULL;
+	if (!kgem->has_llc) {
+		bo = kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+		if (bo) {
+write:
+			kgem_fixup_relocs(kgem, bo, shrink);
+			if (kgem_batch_write(kgem, bo, size)) {
+				kgem_bo_destroy(kgem, bo);
+				return NULL;
+			}
+		}
+	}
+	if (bo == NULL)
+		bo = kgem_new_batch(kgem);
+	return bo;
+#else
+	return kgem_new_batch(kgem);
+#endif
 }
 
 #if !NDEBUG
@@ -3213,7 +3540,7 @@ static void dump_fence_regs(struct kgem 
 
 static int do_execbuf(struct kgem *kgem, struct drm_i915_gem_execbuffer2 *execbuf)
 {
-	int ret;
+	int ret, err;
 
 retry:
 	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
@@ -3229,14 +3556,35 @@ retry:
 		goto retry;
 
 	/* last gasp */
-	return do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+	ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+	if (ret == 0)
+		return 0;
+
+	xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
+		   "Failed to submit rendering commands, trying again with outputs disabled.\n");
+
+	/* One last trick up our sleeve for when we run out of space.
+	 * We turn everything off to free up our pinned framebuffers,
+	 * sprites and cursors, and try one last time.
+	 */
+	err = errno;
+	if (sna_mode_disable(container_of(kgem, struct sna, kgem))) {
+		kgem_cleanup_cache(kgem);
+		ret = do_ioctl(kgem->fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       execbuf);
+		DBG(("%s: last_gasp ret=%d\n", __FUNCTION__, ret));
+		sna_mode_enable(container_of(kgem, struct sna, kgem));
+	}
+	errno = err;
+
+	return ret;
 }
 
 void _kgem_submit(struct kgem *kgem)
 {
 	struct kgem_request *rq;
 	uint32_t batch_end;
-	int size;
 
 	assert(!DBG_NO_HW);
 	assert(!kgem->wedged);
@@ -3266,19 +3614,17 @@ void _kgem_submit(struct kgem *kgem)
 #endif
 
 	rq = kgem->next_request;
-	if (kgem->surface != kgem->batch_size)
-		size = compact_batch_surface(kgem);
-	else
-		size = kgem->nbatch * sizeof(kgem->batch[0]);
-	rq->bo = kgem_create_batch(kgem, size);
+	assert(rq->bo == NULL);
+
+	rq->bo = kgem_create_batch(kgem);
 	if (rq->bo) {
-		uint32_t handle = rq->bo->handle;
-		int i;
+		struct drm_i915_gem_execbuffer2 execbuf;
+		int i, ret;
 
 		assert(!rq->bo->needs_flush);
 
 		i = kgem->nexec++;
-		kgem->exec[i].handle = handle;
+		kgem->exec[i].handle = rq->bo->handle;
 		kgem->exec[i].relocation_count = kgem->nreloc;
 		kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
 		kgem->exec[i].alignment = 0;
@@ -3287,117 +3633,109 @@ void _kgem_submit(struct kgem *kgem)
 		kgem->exec[i].rsvd1 = 0;
 		kgem->exec[i].rsvd2 = 0;
 
-		rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
 		rq->bo->exec = &kgem->exec[i];
 		rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
 		list_add(&rq->bo->request, &rq->buffers);
 		rq->ring = kgem->ring == KGEM_BLT;
 
-		kgem_fixup_self_relocs(kgem, rq->bo);
-
-		if (kgem_batch_write(kgem, handle, size) == 0) {
-			struct drm_i915_gem_execbuffer2 execbuf;
-			int ret;
-
-			memset(&execbuf, 0, sizeof(execbuf));
-			execbuf.buffers_ptr = (uintptr_t)kgem->exec;
-			execbuf.buffer_count = kgem->nexec;
-			execbuf.batch_len = batch_end*sizeof(uint32_t);
-			execbuf.flags = kgem->ring | kgem->batch_flags;
-
-			if (DBG_DUMP) {
-				int fd = open("/tmp/i915-batchbuffers.dump",
-					      O_WRONLY | O_CREAT | O_APPEND,
-					      0666);
-				if (fd != -1) {
-					ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
-					fd = close(fd);
-				}
+		memset(&execbuf, 0, sizeof(execbuf));
+		execbuf.buffers_ptr = (uintptr_t)kgem->exec;
+		execbuf.buffer_count = kgem->nexec;
+		execbuf.batch_len = batch_end*sizeof(uint32_t);
+		execbuf.flags = kgem->ring | kgem->batch_flags;
+
+		if (DBG_DUMP) {
+			int fd = open("/tmp/i915-batchbuffers.dump",
+				      O_WRONLY | O_CREAT | O_APPEND,
+				      0666);
+			if (fd != -1) {
+				ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+				fd = close(fd);
 			}
+		}
 
-			ret = do_execbuf(kgem, &execbuf);
-			if (DEBUG_SYNC && ret == 0) {
-				struct drm_i915_gem_set_domain set_domain;
-
-				VG_CLEAR(set_domain);
-				set_domain.handle = handle;
-				set_domain.read_domains = I915_GEM_DOMAIN_GTT;
-				set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+		ret = do_execbuf(kgem, &execbuf);
+		if (DEBUG_SYNC && ret == 0) {
+			struct drm_i915_gem_set_domain set_domain;
+
+			VG_CLEAR(set_domain);
+			set_domain.handle = rq->bo->handle;
+			set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
-				ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+			ret = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+		}
+		if (ret < 0) {
+			kgem_throttle(kgem);
+			if (!kgem->wedged) {
+				xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
+					   "Failed to submit rendering commands, disabling acceleration.\n");
+				__kgem_set_wedged(kgem);
 			}
-			if (ret < 0) {
-				kgem_throttle(kgem);
-				if (!kgem->wedged) {
-					xf86DrvMsg(kgem_get_screen_index(kgem), X_ERROR,
-						   "Failed to submit rendering commands, disabling acceleration.\n");
-					kgem->wedged = true;
-				}
 
 #if !NDEBUG
-				ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
-				       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
-				       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
-
-				for (i = 0; i < kgem->nexec; i++) {
-					struct kgem_bo *bo, *found = NULL;
-
-					list_for_each_entry(bo, &kgem->next_request->buffers, request) {
-						if (bo->handle == kgem->exec[i].handle) {
-							found = bo;
-							break;
-						}
+			ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
+			       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+			       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, -ret);
+
+			for (i = 0; i < kgem->nexec; i++) {
+				struct kgem_bo *bo, *found = NULL;
+
+				list_for_each_entry(bo, &kgem->next_request->buffers, request) {
+					if (bo->handle == kgem->exec[i].handle) {
+						found = bo;
+						break;
 					}
-					ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
-					       i,
-					       kgem->exec[i].handle,
-					       (int)kgem->exec[i].offset,
-					       found ? kgem_bo_size(found) : -1,
-					       found ? found->tiling : -1,
-					       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
-					       found ? found->snoop : -1,
-					       found ? found->purged : -1);
-				}
-				for (i = 0; i < kgem->nreloc; i++) {
-					ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
-					       i,
-					       (int)kgem->reloc[i].offset,
-					       kgem->reloc[i].target_handle,
-					       kgem->reloc[i].delta,
-					       kgem->reloc[i].read_domains,
-					       kgem->reloc[i].write_domain,
-					       (int)kgem->reloc[i].presumed_offset);
 				}
+				ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
+				       i,
+				       kgem->exec[i].handle,
+				       (int)kgem->exec[i].offset,
+				       found ? kgem_bo_size(found) : -1,
+				       found ? found->tiling : -1,
+				       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
+				       found ? found->snoop : -1,
+				       found ? found->purged : -1);
+			}
+			for (i = 0; i < kgem->nreloc; i++) {
+				ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
+				       i,
+				       (int)kgem->reloc[i].offset,
+				       kgem->reloc[i].target_handle,
+				       kgem->reloc[i].delta,
+				       kgem->reloc[i].read_domains,
+				       kgem->reloc[i].write_domain,
+				       (int)kgem->reloc[i].presumed_offset);
+			}
+
+			{
+				struct drm_i915_gem_get_aperture aperture;
+				if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
+					ErrorF("Aperture size %lld, available %lld\n",
+					       (long long)aperture.aper_size,
+					       (long long)aperture.aper_available_size);
+			}
+
+			if (ret == -ENOSPC)
+				dump_gtt_info(kgem);
+			if (ret == -EDEADLK)
+				dump_fence_regs(kgem);
 
-				{
-					struct drm_i915_gem_get_aperture aperture;
-					if (do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture) == 0)
-						ErrorF("Aperture size %lld, available %lld\n",
-						       (long long)aperture.aper_size,
-						       (long long)aperture.aper_available_size);
+			if (DEBUG_SYNC) {
+				int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+				if (fd != -1) {
+					int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+					assert(ignored == batch_end*sizeof(uint32_t));
+					close(fd);
 				}
 
-				if (ret == -ENOSPC)
-					dump_gtt_info(kgem);
-				if (ret == -EDEADLK)
-					dump_fence_regs(kgem);
-
-				if (DEBUG_SYNC) {
-					int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
-					if (fd != -1) {
-						int ignored = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
-						assert(ignored == batch_end*sizeof(uint32_t));
-						close(fd);
-					}
-
-					FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
-				}
-#endif
+				FatalError("SNA: failed to submit batchbuffer, errno=%d\n", -ret);
 			}
+#endif
 		}
 	}
 #if SHOW_BATCH_AFTER
-	if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t) == 0))
+	if (gem_read(kgem->fd, rq->bo->handle, kgem->batch, 0, batch_end*sizeof(uint32_t)) == 0)
 		__kgem_batch_debug(kgem, batch_end);
 #endif
 	kgem_commit(kgem);
@@ -3439,8 +3777,7 @@ void kgem_throttle(struct kgem *kgem)
 	if (kgem->wedged)
 		return;
 
-	kgem->wedged = __kgem_throttle(kgem, true);
-	if (kgem->wedged) {
+	if (__kgem_throttle(kgem, true)) {
 		static int once;
 		char path[128];
 
@@ -3453,6 +3790,7 @@ void kgem_throttle(struct kgem *kgem)
 			once = 1;
 		}
 
+		__kgem_set_wedged(kgem);
 		kgem->need_throttle = false;
 	}
 }
@@ -3823,7 +4161,7 @@ discard:
 		     __FUNCTION__, for_cpu ? "cpu" : "gtt"));
 		cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
 		list_for_each_entry(bo, cache, vma) {
-			assert(for_cpu ? bo->map__cpu : bo->map__gtt);
+			assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
 			assert(bucket(bo) == cache_bucket(num_pages));
 			assert(bo->proxy == NULL);
 			assert(bo->rq == NULL);
@@ -3905,10 +4243,10 @@ discard:
 			bo->pitch = 0;
 		}
 
-		if (bo->map__gtt || bo->map__cpu) {
+		if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
 			if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
 				int for_cpu = !!(flags & CREATE_CPU_MAP);
-				if (for_cpu ? bo->map__cpu : bo->map__gtt){
+				if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
 					if (first != NULL)
 						break;
 
@@ -4148,16 +4486,18 @@ struct kgem_bo *kgem_create_linear(struc
 	}
 
 	size = NUM_PAGES(size);
-	bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
-	if (bo) {
-		assert(bo->domain != DOMAIN_GPU);
-		ASSERT_IDLE(kgem, bo->handle);
-		bo->refcnt = 1;
-		return bo;
-	}
+	if ((flags & CREATE_UNCACHED) == 0) {
+		bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
+		if (bo) {
+			assert(bo->domain != DOMAIN_GPU);
+			ASSERT_IDLE(kgem, bo->handle);
+			bo->refcnt = 1;
+			return bo;
+		}
 
-	if (flags & CREATE_CACHED)
-		return NULL;
+		if (flags & CREATE_CACHED)
+			return NULL;
+	}
 
 	handle = gem_create(kgem->fd, size);
 	if (handle == 0)
@@ -4313,7 +4653,7 @@ unsigned kgem_can_create_2d(struct kgem 
 			flags |= KGEM_CAN_CREATE_CPU;
 		if (size > 4096 && size <= kgem->max_gpu_size)
 			flags |= KGEM_CAN_CREATE_GPU;
-		if (size <= PAGE_SIZE*kgem->aperture_mappable/4)
+		if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
 			flags |= KGEM_CAN_CREATE_GTT;
 		if (size > kgem->large_object_size)
 			flags |= KGEM_CAN_CREATE_LARGE;
@@ -4465,7 +4805,10 @@ static void __kgem_bo_make_scanout(struc
 	if (bo->map__gtt == NULL)
 		bo->map__gtt = __kgem_bo_map__gtt(kgem, bo);
 	if (bo->map__gtt) {
-		*(uint32_t *)bo->map__gtt = 0;
+		if (sigtrap_get() == 0) {
+			*(uint32_t *)bo->map__gtt = 0;
+			sigtrap_put();
+		}
 		bo->domain = DOMAIN_GTT;
 	}
 
@@ -4736,7 +5079,7 @@ large_inactive:
 				assert(bucket(bo) == bucket);
 				assert(bo->refcnt == 0);
 				assert(!bo->scanout);
-				assert(for_cpu ? bo->map__cpu : bo->map__gtt);
+				assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
 				assert(bo->rq == NULL);
 				assert(bo->exec == NULL);
 				assert(list_is_empty(&bo->request));
@@ -5192,6 +5535,7 @@ struct kgem_bo *kgem_create_cpu_2d(struc
 		assert(bo->tiling == I915_TILING_NONE);
 		assert_tiling(kgem, bo);
 
+		assert(!__kgem_busy(kgem, bo->handle));
 		if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
 			kgem_bo_destroy(kgem, bo);
 			return NULL;
@@ -5828,7 +6172,6 @@ static void kgem_trim_vma_cache(struct k
 	i = 0;
 	while (kgem->vma[type].count > 0) {
 		struct kgem_bo *bo = NULL;
-		void **ptr;
 
 		for (j = 0;
 		     bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
@@ -5843,12 +6186,23 @@ static void kgem_trim_vma_cache(struct k
 		DBG(("%s: discarding inactive %s vma cache for %d\n",
 		     __FUNCTION__, type ? "CPU" : "GTT", bo->handle));
 
-		ptr = type ? &bo->map__cpu : &bo->map__gtt;
 		assert(bo->rq == NULL);
+		if (type) {
+			VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
+			munmap(MAP(bo->map__cpu), bytes(bo));
+			bo->map__cpu = NULL;
+		} else {
+			if (bo->map__wc) {
+				VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
+				munmap(bo->map__wc, bytes(bo));
+				bo->map__wc = NULL;
+			}
+			if (bo->map__gtt) {
+				munmap(bo->map__gtt, bytes(bo));
+				bo->map__gtt = NULL;
+			}
+		}
 
-		VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo)));
-		munmap(MAP(*ptr), bytes(bo));
-		*ptr = NULL;
 		list_del(&bo->vma);
 		kgem->vma[type].count--;
 
@@ -5860,10 +6214,35 @@ static void kgem_trim_vma_cache(struct k
 	}
 }
 
-void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
+static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
 {
 	void *ptr;
 
+	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
+
+	assert(bo->proxy == NULL);
+	assert(!bo->snoop);
+
+	kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+
+	if (bo->tiling || !kgem->has_wc_mmap) {
+		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
+		assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
+
+		ptr = bo->map__gtt;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__gtt(kgem, bo);
+	} else {
+		ptr = bo->map__wc;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__wc(kgem, bo);
+	}
+
+	return ptr;
+}
+
+void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
+{
 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
@@ -5878,26 +6257,7 @@ void *kgem_bo_map__async(struct kgem *kg
 		return kgem_bo_map__cpu(kgem, bo);
 	}
 
-	ptr = MAP(bo->map__gtt);
-	if (ptr == NULL) {
-		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
-
-		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
-
-		ptr = __kgem_bo_map__gtt(kgem, bo);
-		if (ptr == NULL)
-			return NULL;
-
-		/* Cache this mapping to avoid the overhead of an
-		 * excruciatingly slow GTT pagefault. This is more an
-		 * issue with compositing managers which need to frequently
-		 * flush CPU damage to their GPU bo.
-		 */
-		bo->map__gtt = ptr;
-		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
-	}
-
-	return ptr;
+	return __kgem_bo_map__gtt_or_wc(kgem, bo);
 }
 
 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
@@ -5923,25 +6283,7 @@ void *kgem_bo_map(struct kgem *kgem, str
 		return ptr;
 	}
 
-	ptr = MAP(bo->map__gtt);
-	if (ptr == NULL) {
-		assert(num_pages(bo) <= kgem->aperture_mappable / 2);
-		assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
-
-		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
-
-		ptr = __kgem_bo_map__gtt(kgem, bo);
-		if (ptr == NULL)
-			return NULL;
-
-		/* Cache this mapping to avoid the overhead of an
-		 * excruciatingly slow GTT pagefault. This is more an
-		 * issue with compositing managers which need to frequently
-		 * flush CPU damage to their GPU bo.
-		 */
-		bo->map__gtt = ptr;
-		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
-	}
+	ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
 
 	if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
 		struct drm_i915_gem_set_domain set_domain;
@@ -5969,8 +6311,6 @@ void *kgem_bo_map(struct kgem *kgem, str
 
 void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
 {
-	void *ptr;
-
 	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
 	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
@@ -5980,38 +6320,28 @@ void *kgem_bo_map__gtt(struct kgem *kgem
 	assert_tiling(kgem, bo);
 	assert(!bo->purged || bo->reusable);
 
-	ptr = MAP(bo->map__gtt);
-	if (ptr == NULL) {
-		assert(num_pages(bo) <= kgem->aperture_mappable / 4);
-
-		kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+	return __kgem_bo_map__gtt_or_wc(kgem, bo);
+}
 
-		ptr = __kgem_bo_map__gtt(kgem, bo);
-		if (ptr == NULL)
-			return NULL;
+void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
+{
+	DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
+	     bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
-		/* Cache this mapping to avoid the overhead of an
-		 * excruciatingly slow GTT pagefault. This is more an
-		 * issue with compositing managers which need to frequently
-		 * flush CPU damage to their GPU bo.
-		 */
-		bo->map__gtt = ptr;
-		DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
-	}
+	assert(bo->proxy == NULL);
+	assert(bo->exec == NULL);
+	assert(list_is_empty(&bo->list));
+	assert_tiling(kgem, bo);
+	assert(!bo->purged || bo->reusable);
 
-	return ptr;
-}
+	if (bo->map__wc)
+		return bo->map__wc;
 
-void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
-{
-	return kgem_bo_map__async(kgem, bo);
+	return __kgem_bo_map__wc(kgem, bo);
 }
 
 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 {
-	struct drm_i915_gem_mmap mmap_arg;
-	int err;
-
 	DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
 	     __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
 	assert(!bo->purged);
@@ -6024,31 +6354,31 @@ void *kgem_bo_map__cpu(struct kgem *kgem
 
 	kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
-retry:
-	VG_CLEAR(mmap_arg);
-	mmap_arg.handle = bo->handle;
-	mmap_arg.offset = 0;
-	mmap_arg.size = bytes(bo);
-	if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg))) {
-		assert(err != EINVAL);
-
-		if (__kgem_throttle_retire(kgem, 0))
-			goto retry;
+	return __kgem_bo_map__cpu(kgem, bo);
+}
 
-		if (kgem_cleanup_cache(kgem))
-			goto retry;
+void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
+{
+	void *ptr;
 
-		ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
-		     __FUNCTION__, bo->handle, bytes(bo), -err));
-		return NULL;
+	if (bo->tiling == I915_TILING_NONE && kgem->has_llc) {
+		ptr = MAP(bo->map__cpu);
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__cpu(kgem, bo);
+	} else if (bo->tiling || !kgem->has_wc_mmap) {
+		ptr = bo->map__gtt;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__gtt(kgem, bo);
+	} else {
+		ptr = bo->map__wc;
+		if (ptr == NULL)
+			ptr = __kgem_bo_map__wc(kgem, bo);
 	}
 
-	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
-
-	DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
-	return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr;
+	return ptr;
 }
 
+
 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
 {
 	struct drm_gem_flink flink;
@@ -6344,6 +6674,7 @@ init_buffer_from_bo(struct kgem_buffer *
 	     __FUNCTION__, old->handle));
 
 	assert(old->proxy == NULL);
+	assert(list_is_empty(&old->list));
 
 	memcpy(&bo->base, old, sizeof(*old));
 	if (old->rq)
@@ -6469,6 +6800,7 @@ create_snoopable_buffer(struct kgem *kge
 		assert(bo->base.refcnt == 1);
 		assert(bo->mmapped == MMAPPED_CPU);
 		assert(bo->need_io == false);
+		assert(!__kgem_busy(kgem, bo->base.handle));
 
 		if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
 			goto free_caching;
@@ -6648,7 +6980,7 @@ struct kgem_bo *kgem_create_buffer(struc
 	assert(alloc);
 
 	alloc /= PAGE_SIZE;
-	if (alloc > kgem->aperture_mappable / 4)
+	if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
 		flags &= ~KGEM_BUFFER_INPLACE;
 
 	if (kgem->has_llc &&
@@ -6880,7 +7212,7 @@ init:
 	assert(!bo->need_io || !bo->base.needs_flush);
 	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
 	assert(bo->mem);
-	assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem);
+	assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
 	assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
 
 	bo->used = size;
@@ -6918,7 +7250,7 @@ struct kgem_bo *kgem_create_buffer_2d(st
 	assert(width > 0 && height > 0);
 	assert(ret != NULL);
 	stride = ALIGN(width, 2) * bpp >> 3;
-	stride = ALIGN(stride, 4);
+	stride = ALIGN(stride, kgem->gen >= 0100 ? 32 : 4);
 
 	DBG(("%s: %dx%d, %d bpp, stride=%d\n",
 	     __FUNCTION__, width, height, bpp, stride));
@@ -7227,7 +7559,8 @@ bool kgem_bo_convert_to_gpu(struct kgem 
 			    struct kgem_bo *bo,
 			    unsigned flags)
 {
-	DBG(("%s: converting handle=%d from CPU to GPU, flags=%x\n", __FUNCTION__, bo->handle));
+	DBG(("%s: converting handle=%d from CPU to GPU, flags=%x, busy?=%d\n",
+	     __FUNCTION__, bo->handle, flags, __kgem_bo_is_busy(kgem, bo)));
 	assert(bo->tiling == I915_TILING_NONE);
 
 	if (kgem->has_llc)

Index: xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c
diff -u xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c:1.2 xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c:1.3
--- xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c:1.2	Wed Nov  5 19:54:00 2014
+++ xsrc/external/mit/xf86-video-intel/dist/src/sna/sna_display.c	Fri Jan 16 21:32:11 2015
@@ -120,16 +120,21 @@ struct sna_crtc {
 	int dpms_mode;
 	PixmapPtr slave_pixmap;
 	DamagePtr slave_damage;
-	struct kgem_bo *bo, *shadow_bo;
+	struct kgem_bo *bo, *shadow_bo, *client_bo;
 	struct sna_cursor *cursor;
 	unsigned int last_cursor_size;
 	uint32_t offset;
 	bool shadow;
 	bool fallback_shadow;
 	bool transform;
+	bool flip_pending;
 	uint8_t id;
 	uint8_t pipe;
 
+	RegionRec client_damage; /* XXX overlap with shadow damage? */
+
+	uint16_t shadow_bo_width, shadow_bo_height;
+
 	uint32_t rotation;
 	struct plane {
 		uint32_t id;
@@ -215,6 +220,11 @@ enum { /* XXX copied from hw/xfree86/mod
 
 static void sna_crtc_disable_cursor(struct sna *sna, struct sna_crtc *crtc);
 
+static bool is_zaphod(ScrnInfoPtr scrn)
+{
+	return xf86IsEntityShared(scrn->entityList[0]);
+}
+
 inline static unsigned count_to_mask(int x)
 {
 	return (1 << x) - 1;
@@ -245,15 +255,6 @@ static inline bool event_pending(int fd)
 	return poll(&pfd, 1, 0) == 1;
 }
 
-static bool sna_mode_has_pending_events(struct sna *sna)
-{
-	/* In order to workaround a kernel bug in not honouring O_NONBLOCK,
-	 * check that the fd is readable before attempting to read the next
-	 * event from drm.
-	 */
-	return event_pending(sna->kgem.fd);
-}
-
 static bool sna_mode_wait_for_event(struct sna *sna)
 {
 	struct pollfd pfd;
@@ -603,24 +604,31 @@ static void sna_backlight_drain_uevents(
 static void sna_backlight_close(struct sna *sna) { }
 #endif
 
+static void
+sna_output_backlight_disable(struct sna_output *sna_output)
+{
+	xf86OutputPtr output = sna_output->base;
+
+	xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
+		   "Failed to set backlight %s for output %s, disabling\n",
+		   sna_output->backlight.iface, output->name);
+	backlight_disable(&sna_output->backlight);
+	if (output->randr_output) {
+		RRDeleteOutputProperty(output->randr_output, backlight_atom);
+		RRDeleteOutputProperty(output->randr_output, backlight_deprecated_atom);
+	}
+}
+
 static int
 sna_output_backlight_set(struct sna_output *sna_output, int level)
 {
-	xf86OutputPtr output = sna_output->base;
 	int ret = 0;
 
 	DBG(("%s(%s) level=%d, max=%d\n", __FUNCTION__,
-	     output->name, level, sna_output->backlight.max));
+	     sna_output->base->name, level, sna_output->backlight.max));
 
 	if (backlight_set(&sna_output->backlight, level)) {
-		xf86DrvMsg(output->scrn->scrnIndex, X_ERROR,
-			   "Failed to set backlight %s for output %s to brightness level %d, disabling\n",
-			   sna_output->backlight.iface, output->name, level);
-		backlight_disable(&sna_output->backlight);
-		if (output->randr_output) {
-			RRDeleteOutputProperty(output->randr_output, backlight_atom);
-			RRDeleteOutputProperty(output->randr_output, backlight_deprecated_atom);
-		}
+		sna_output_backlight_disable(sna_output);
 		ret = -1;
 	}
 
@@ -628,10 +636,28 @@ sna_output_backlight_set(struct sna_outp
 	 * the change latter when we wake up and the output is in a different
 	 * state.
 	 */
-	sna_backlight_drain_uevents(to_sna(output->scrn));
+	sna_backlight_drain_uevents(to_sna(sna_output->base->scrn));
 	return ret;
 }
 
+static void
+sna_output_backlight_off(struct sna_output *sna_output)
+{
+	DBG(("%s(%s)\n", __FUNCTION__, sna_output->base->name));
+	backlight_off(&sna_output->backlight);
+	sna_output_backlight_set(sna_output, 0);
+}
+
+static void
+sna_output_backlight_on(struct sna_output *sna_output)
+{
+	DBG(("%s(%s)\n", __FUNCTION__, sna_output->base->name));
+	sna_output_backlight_set(sna_output,
+				 sna_output->backlight_active_level);
+	if (backlight_on(&sna_output->backlight) < 0)
+		sna_output_backlight_disable(sna_output);
+}
+
 static int
 sna_output_backlight_get(xf86OutputPtr output)
 {
@@ -916,6 +942,7 @@ rotation_set(struct sna *sna, struct pla
 	assert(p->id);
 	assert(p->rotation.prop);
 
+	VG_CLEAR(prop);
 	prop.obj_id = p->id;
 	prop.obj_type = LOCAL_MODE_OBJECT_PLANE;
 	prop.prop_id = p->rotation.prop;
@@ -962,6 +989,11 @@ sna_crtc_apply(xf86CrtcPtr crtc)
 	int i;
 
 	DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, sna_crtc->id, sna_crtc->pipe, sna_crtc->bo->handle));
+	if (!sna_crtc->kmode.clock) {
+		ERR(("%s(CRTC:%d [pipe=%d]): attempted to set an invalid mode\n",
+		     __FUNCTION__, sna_crtc->id, sna_crtc->pipe));
+		return false;
+	}
 
 	assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids));
 	sna_crtc_disable_cursor(sna, sna_crtc);
@@ -1003,14 +1035,21 @@ sna_crtc_apply(xf86CrtcPtr crtc)
 		     (uint32_t)output->possible_clones));
 
 		assert(output->possible_crtcs & (1 << sna_crtc->pipe) ||
-		       xf86IsEntityShared(crtc->scrn->entityList[0]));
+		       is_zaphod(crtc->scrn));
 
 		output_ids[output_count] = to_connector_id(output);
 		if (++output_count == ARRAY_SIZE(output_ids)) {
+			DBG(("%s: too many outputs (%d) for me!\n",
+			     __FUNCTION__, output_count));
 			errno = EINVAL;
 			return false;
 		}
 	}
+	if (output_count == 0) {
+		DBG(("%s: no outputs\n", __FUNCTION__));
+		errno = EINVAL;
+		return false;
+	}
 
 	VG_CLEAR(arg);
 	arg.crtc_id = sna_crtc->id;
@@ -1070,6 +1109,7 @@ static bool wait_for_shadow(struct sna *
 	PixmapPtr pixmap = priv->pixmap;
 	DamagePtr damage;
 	struct kgem_bo *bo, *tmp;
+	int flip_active;
 	bool ret = true;
 
 	DBG(("%s: flags=%x, flips=%d, handle=%d, shadow=%d\n",
@@ -1084,14 +1124,13 @@ static bool wait_for_shadow(struct sna *
 
 	if ((flags & MOVE_WRITE) == 0) {
 		if ((flags & __MOVE_SCANOUT) == 0) {
-			while (!list_is_empty(&sna->mode.shadow_crtc)) {
-				struct sna_crtc *crtc =
-					list_first_entry(&sna->mode.shadow_crtc,
-							 struct sna_crtc,
-							 shadow_link);
+			struct sna_crtc *crtc;
+
+			list_for_each_entry(crtc, &sna->mode.shadow_crtc, shadow_link) {
 				if (overlap(&sna->mode.shadow_region.extents,
 					    &crtc->base->bounds)) {
 					DrawableRec draw;
+					RegionRec region;
 
 					draw.width = crtc->base->mode.HDisplay;
 					draw.height = crtc->base->mode.VDisplay;
@@ -1104,18 +1143,18 @@ static bool wait_for_shadow(struct sna *
 					     crtc->base->bounds.y1,
 					     crtc->base->bounds.x2,
 					     crtc->base->bounds.y2,
-					     crtc->shadow_bo->handle));
+					     crtc->client_bo->handle));
 
 					ret &= sna->render.copy_boxes(sna, GXcopy,
-								      &draw, crtc->shadow_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
+								      &draw, crtc->client_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
 								      &pixmap->drawable, priv->gpu_bo, 0, 0,
 								      &crtc->base->bounds, 1,
 								      0);
-				}
 
-				kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
-				crtc->shadow_bo = NULL;
-				list_del(&crtc->shadow_link);
+					region.extents = crtc->base->bounds;
+					region.data = NULL;
+					RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, &region);
+				}
 			}
 		}
 
@@ -1127,17 +1166,32 @@ static bool wait_for_shadow(struct sna *
 	damage = sna->mode.shadow_damage;
 	sna->mode.shadow_damage = NULL;
 
-	if (sna->mode.flip_active) {
+	flip_active = sna->mode.flip_active;
+	if (flip_active) {
+		struct sna_crtc *crtc;
+		list_for_each_entry(crtc, &sna->mode.shadow_crtc, shadow_link)
+			flip_active -= crtc->flip_pending;
+		DBG(("%s: %d flips still pending, shadow flip_active=%d\n",
+		     __FUNCTION__, sna->mode.flip_active, flip_active));
+	}
+	if (flip_active) {
 		/* raw cmd to avoid setting wedged in the middle of an op */
 		drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_THROTTLE, 0);
 		sna->kgem.need_throttle = false;
 
-		while (sna->mode.flip_active && sna_mode_has_pending_events(sna))
-			sna_mode_wakeup(sna);
+		while (flip_active && sna_mode_wakeup(sna)) {
+			struct sna_crtc *crtc;
+
+			flip_active = sna->mode.flip_active;
+			list_for_each_entry(crtc, &sna->mode.shadow_crtc, shadow_link)
+				flip_active -= crtc->flip_pending;
+		}
+		DBG(("%s: after waiting %d flips outstanding, flip_active=%d\n",
+		     __FUNCTION__, sna->mode.flip_active, flip_active));
 	}
 
 	bo = sna->mode.shadow;
-	if (sna->mode.flip_active) {
+	if (flip_active) {
 		bo = kgem_create_2d(&sna->kgem,
 				    pixmap->drawable.width,
 				    pixmap->drawable.height,
@@ -1209,10 +1263,10 @@ static bool wait_for_shadow(struct sna *
 			     crtc->base->bounds.y1,
 			     crtc->base->bounds.x2,
 			     crtc->base->bounds.y2,
-			     crtc->shadow_bo->handle));
+			     crtc->client_bo->handle));
 
 			ret = sna->render.copy_boxes(sna, GXcopy,
-						     &draw, crtc->shadow_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
+						     &draw, crtc->client_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
 						     &pixmap->drawable, bo, 0, 0,
 						     &crtc->base->bounds, 1,
 						     0);
@@ -1223,8 +1277,8 @@ static bool wait_for_shadow(struct sna *
 			RegionSubtract(&sna->mode.shadow_region, &sna->mode.shadow_region, &region);
 		}
 
-		kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
-		crtc->shadow_bo = NULL;
+		kgem_bo_destroy(&sna->kgem, crtc->client_bo);
+		crtc->client_bo = NULL;
 		list_del(&crtc->shadow_link);
 	}
 
@@ -1416,7 +1470,7 @@ static bool sna_crtc_enable_shadow(struc
 
 static void sna_crtc_disable_override(struct sna *sna, struct sna_crtc *crtc)
 {
-	if (crtc->shadow_bo == NULL)
+	if (crtc->client_bo == NULL)
 		return;
 
 	if (!crtc->transform) {
@@ -1428,13 +1482,13 @@ static void sna_crtc_disable_override(st
 		tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel;
 
 		sna->render.copy_boxes(sna, GXcopy,
-				       &tmp, crtc->shadow_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
+				       &tmp, crtc->client_bo, -crtc->base->bounds.x1, -crtc->base->bounds.y1,
 				       &sna->front->drawable, __sna_pixmap_get_bo(sna->front), 0, 0,
 				       &crtc->base->bounds, 1, 0);
 		list_del(&crtc->shadow_link);
 	}
-	kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
-	crtc->shadow_bo = NULL;
+	kgem_bo_destroy(&sna->kgem, crtc->client_bo);
+	crtc->client_bo = NULL;
 }
 
 static void sna_crtc_disable_shadow(struct sna *sna, struct sna_crtc *crtc)
@@ -1462,24 +1516,8 @@ static void sna_crtc_disable_shadow(stru
 }
 
 static void
-sna_crtc_disable(xf86CrtcPtr crtc)
+__sna_crtc_disable(struct sna *sna, struct sna_crtc *sna_crtc)
 {
-	struct sna *sna = to_sna(crtc->scrn);
-	struct sna_crtc *sna_crtc = to_sna_crtc(crtc);
-	struct drm_mode_crtc arg;
-
-	if (sna_crtc == NULL)
-		return;
-
-	DBG(("%s: disabling crtc [%d, pipe=%d]\n", __FUNCTION__,
-	     sna_crtc->id, sna_crtc->pipe));
-
-	sna_crtc_force_outputs_off(crtc);
-
-	memset(&arg, 0, sizeof(arg));
-	arg.crtc_id = sna_crtc->id;
-	(void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg);
-
 	sna_crtc->mode_serial++;
 
 	sna_crtc_disable_cursor(sna, sna_crtc);
@@ -1498,17 +1536,45 @@ sna_crtc_disable(xf86CrtcPtr crtc)
 		sna->mode.dirty = true;
 	}
 
+	if (sna_crtc->shadow_bo) {
+		kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo);
+		sna_crtc->shadow_bo = NULL;
+	}
 	sna_crtc->transform = false;
 
-	assert(sna_crtc->dpms_mode == DPMSModeOff);
 	assert(!sna_crtc->shadow);
 }
 
+static void
+sna_crtc_disable(xf86CrtcPtr crtc)
+{
+	struct sna *sna = to_sna(crtc->scrn);
+	struct sna_crtc *sna_crtc = to_sna_crtc(crtc);
+	struct drm_mode_crtc arg;
+
+	if (sna_crtc == NULL)
+		return;
+
+	DBG(("%s: disabling crtc [%d, pipe=%d]\n", __FUNCTION__,
+	     sna_crtc->id, sna_crtc->pipe));
+
+	sna_crtc_force_outputs_off(crtc);
+	assert(sna_crtc->dpms_mode == DPMSModeOff);
+
+	memset(&arg, 0, sizeof(arg));
+	arg.crtc_id = sna_crtc->id;
+	(void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_SETCRTC, &arg);
+
+	__sna_crtc_disable(sna, sna_crtc);
+}
+
 static void update_flush_interval(struct sna *sna)
 {
 	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
 	int i, max_vrefresh = 0;
 
+	DBG(("%s: front_active=%d\n", __FUNCTION__, sna->mode.front_active));
+
 	for (i = 0; i < sna->mode.num_real_crtc; i++) {
 		xf86CrtcPtr crtc = config->crtc[i];
 
@@ -1590,8 +1656,9 @@ void sna_copy_fbcon(struct sna *sna)
 	DBG(("%s\n", __FUNCTION__));
 	assert((sna->flags & SNA_IS_HOSTED) == 0);
 
-	priv = sna_pixmap(sna->front);
-	assert(priv && priv->gpu_bo);
+	priv = sna_pixmap_move_to_gpu(sna->front, MOVE_WRITE | __MOVE_SCANOUT);
+	if (priv == NULL)
+		return;
 
 	/* Scan the connectors for a framebuffer and assume that is the fbcon */
 	VG_CLEAR(fbcon);
@@ -1676,7 +1743,7 @@ static bool use_shadow(struct sna *sna, 
 	RRTransformPtr transform;
 	PictTransform crtc_to_fb;
 	struct pict_f_transform f_crtc_to_fb, f_fb_to_crtc;
-	unsigned long pitch_limit;
+	unsigned pitch_limit;
 	struct sna_pixmap *priv;
 	BoxRec b;
 
@@ -1693,7 +1760,7 @@ static bool use_shadow(struct sna *sna, 
 	}
 
 	if (sna->flags & SNA_TEAR_FREE && to_sna_crtc(crtc)->slave_pixmap) {
-		DBG(("%s: tear-free shadow required\n", __FUNCTION__));
+		DBG(("%s: TearFree shadow required\n", __FUNCTION__));
 		return true;
 	}
 
@@ -1706,7 +1773,7 @@ static bool use_shadow(struct sna *sna, 
 		return true;
 	}
 
-	priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ);
+	priv = sna_pixmap_force_to_gpu(sna->front, MOVE_READ | __MOVE_SCANOUT);
 	if (priv == NULL)
 		return true; /* maybe we can create a bo for the scanout? */
 
@@ -1798,6 +1865,61 @@ static void set_shadow(struct sna *sna, 
 	priv->move_to_gpu_data = sna;
 }
 
+static struct kgem_bo *
+get_scanout_bo(struct sna *sna, PixmapPtr pixmap)
+{
+	struct sna_pixmap *priv;
+
+	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | __MOVE_SCANOUT);
+	if (!priv)
+		return NULL;
+
+	if (priv->gpu_bo->pitch & 63) {
+		struct kgem_bo *tmp;
+		BoxRec b;
+
+		DBG(("%s: converting to scanout bo due to bad pitch [%d]\n",
+		     __FUNCTION__, priv->gpu_bo->pitch));
+
+		if (priv->pinned) {
+			DBG(("%s: failed as the Pixmap is already pinned [%x]\n",
+			     __FUNCTION__, priv->pinned));
+			return NULL;
+		}
+
+		tmp = kgem_create_2d(&sna->kgem,
+				     pixmap->drawable.width,
+				     pixmap->drawable.height,
+				     sna->scrn->bitsPerPixel,
+				     priv->gpu_bo->tiling,
+				     CREATE_EXACT | CREATE_SCANOUT);
+		if (tmp == NULL) {
+			DBG(("%s: allocation failed\n", __FUNCTION__));
+			return NULL;
+		}
+
+		b.x1 = 0;
+		b.y1 = 0;
+		b.x2 = pixmap->drawable.width;
+		b.y2 = pixmap->drawable.height;
+
+		if (sna->render.copy_boxes(sna, GXcopy,
+					   &pixmap->drawable, priv->gpu_bo, 0, 0,
+					   &pixmap->drawable, tmp, 0, 0,
+					   &b, 1, COPY_LAST)) {
+			DBG(("%s: copy failed\n", __FUNCTION__));
+			kgem_bo_destroy(&sna->kgem, tmp);
+			return NULL;
+		}
+
+		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+		priv->gpu_bo = tmp;
+	}
+
+	priv->pinned |= PIN_SCANOUT;
+	return priv->gpu_bo;
+}
+
 static struct kgem_bo *sna_crtc_attach(xf86CrtcPtr crtc)
 {
 	struct sna_crtc *sna_crtc = to_sna_crtc(crtc);
@@ -1812,14 +1934,28 @@ static struct kgem_bo *sna_crtc_attach(x
 		unsigned long tiled_limit;
 		int tiling;
 
+force_shadow:
 		if (!sna_crtc_enable_shadow(sna, sna_crtc)) {
-			DBG(("%s: failed to enable crtc shadow\n"));
+			DBG(("%s: failed to enable crtc shadow\n", __FUNCTION__));
 			return NULL;
 		}
 
 		DBG(("%s: attaching to per-crtc pixmap %dx%d\n",
 		     __FUNCTION__, crtc->mode.HDisplay, crtc->mode.VDisplay));
 
+		bo = sna_crtc->shadow_bo;
+		if (bo) {
+			if (sna_crtc->shadow_bo_width == crtc->mode.HDisplay &&
+			    sna_crtc->shadow_bo_height == crtc->mode.VDisplay) {
+				DBG(("%s: reusing current shadow bo handle=%d\n",
+				     __FUNCTION__, bo->handle));
+				goto out_shadow;
+			}
+
+			kgem_bo_destroy(&sna->kgem, bo);
+			sna_crtc->shadow_bo = NULL;
+		}
+
 		tiling = I915_TILING_X;
 		if (sna->kgem.gen == 071)
 			tiled_limit = 16 * 1024 * 8;
@@ -1839,44 +1975,83 @@ static struct kgem_bo *sna_crtc_attach(x
 				    scrn->bitsPerPixel,
 				    tiling, CREATE_SCANOUT);
 		if (bo == NULL) {
-			DBG(("%s: failed to allocate crtc scanout\n"));
+			DBG(("%s: failed to allocate crtc scanout\n", __FUNCTION__));
 			return NULL;
 		}
 
 		if (!get_fb(sna, bo, crtc->mode.HDisplay, crtc->mode.VDisplay)) {
-			DBG(("%s: failed to bind fb for crtc scanout\n"));
+			DBG(("%s: failed to bind fb for crtc scanout\n", __FUNCTION__));
 			kgem_bo_destroy(&sna->kgem, bo);
 			return NULL;
 		}
 
+		if (__sna_pixmap_get_bo(sna->front) && !crtc->transformPresent) {
+			DrawableRec tmp;
+			BoxRec b;
+
+			b.x1 = crtc->x;
+			b.y1 = crtc->y;
+			b.x2 = crtc->x + crtc->mode.HDisplay;
+			b.y2 = crtc->y + crtc->mode.VDisplay;
+
+			DBG(("%s: copying onto shadow CRTC: (%d, %d)x(%d, %d), handle=%d\n",
+			     __FUNCTION__,
+			     b.x1, b.y1,
+			     b.x2, b.y2,
+			     bo->handle));
+
+			tmp.width = crtc->mode.HDisplay;
+			tmp.height = crtc->mode.VDisplay;
+			tmp.depth = sna->front->drawable.depth;
+			tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel;
+
+			(void)sna->render.copy_boxes(sna, GXcopy,
+						     &sna->front->drawable, __sna_pixmap_get_bo(sna->front), 0, 0,
+						     &tmp, bo, -b.x1, -b.y1,
+						     &b, 1, 0);
+		}
+
+		sna_crtc->shadow_bo_width = crtc->mode.HDisplay;
+		sna_crtc->shadow_bo_height = crtc->mode.VDisplay;
+		sna_crtc->shadow_bo = bo;
+out_shadow:
 		sna_crtc->transform = true;
-		return bo;
+		return kgem_bo_reference(bo);
 	} else {
+		if (sna_crtc->shadow_bo) {
+			kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo);
+			sna_crtc->shadow_bo = NULL;
+		}
+
 		if (sna_crtc->slave_pixmap) {
 			DBG(("%s: attaching to scanout pixmap\n", __FUNCTION__));
-			bo = sna_pixmap_pin(sna_crtc->slave_pixmap, PIN_SCANOUT);
+			bo = get_scanout_bo(sna, sna_crtc->slave_pixmap);
 			if (bo == NULL) {
-				DBG(("%s: failed to pin crtc scanout\n"));
-				return NULL;
+				DBG(("%s: failed to pin crtc scanout\n", __FUNCTION__));
+				sna_crtc->fallback_shadow = true;
+				goto force_shadow;
 			}
 
 			if (!get_fb(sna, bo,
 				    sna_crtc->slave_pixmap->drawable.width,
 				    sna_crtc->slave_pixmap->drawable.height)) {
-				DBG(("%s: failed to bind fb for crtc scanout\n"));
-				return NULL;
+				DBG(("%s: failed to bind fb for crtc scanout\n", __FUNCTION__));
+				sna_crtc->fallback_shadow = true;
+				goto force_shadow;
 			}
 		} else {
 			DBG(("%s: attaching to framebuffer\n", __FUNCTION__));
-			bo = sna_pixmap_pin(sna->front, PIN_SCANOUT);
+			bo = get_scanout_bo(sna, sna->front);
 			if (bo == NULL) {
 				DBG(("%s: failed to pin framebuffer\n", __FUNCTION__));
-				return NULL;
+				sna_crtc->fallback_shadow = true;
+				goto force_shadow;
 			}
 
 			if (!get_fb(sna, bo, scrn->virtualX, scrn->virtualY)) {
-				DBG(("%s: failed to bind fb for crtc scanout\n"));
-				return NULL;
+				DBG(("%s: failed to bind fb for crtc scanout\n", __FUNCTION__));
+				sna_crtc->fallback_shadow = true;
+				goto force_shadow;
 			}
 		}
 
@@ -1885,11 +2060,11 @@ static struct kgem_bo *sna_crtc_attach(x
 
 			DBG(("%s: enabling TearFree shadow\n", __FUNCTION__));
 			if (!sna_crtc_enable_shadow(sna, sna_crtc)) {
-				DBG(("%s: failed to enable crtc shadow\n"));
+				DBG(("%s: failed to enable crtc shadow\n", __FUNCTION__));
 				return NULL;
 			}
 
-			if (sna->mode.shadow == NULL) {
+			if (sna->mode.shadow == NULL && !wedged(sna)) {
 				RegionRec region;
 				struct kgem_bo *shadow;
 
@@ -1913,15 +2088,17 @@ static struct kgem_bo *sna_crtc_attach(x
 							CREATE_SCANOUT);
 				if (shadow == NULL) {
 					DBG(("%s: failed to allocate TearFree shadow bo\n", __FUNCTION__));
-					return NULL;
+					sna_crtc->fallback_shadow = true;
+					goto force_shadow;
 				}
 
 				if (!get_fb(sna, shadow,
 					    region.extents.x2,
 					    region.extents.y2)) {
-					DBG(("%s: failed to bind fb for TearFeee shadow\n"));
+					DBG(("%s: failed to bind fb for TearFeee shadow\n", __FUNCTION__));
 					kgem_bo_destroy(&sna->kgem, shadow);
-					return NULL;
+					sna_crtc->fallback_shadow = true;
+					goto force_shadow;
 				}
 
 				sna->mode.shadow = shadow;
@@ -2035,6 +2212,7 @@ sna_crtc_damage(xf86CrtcPtr crtc)
 	     __FUNCTION__, to_sna_crtc(crtc)->id,
 	     region.extents.x1, region.extents.y1,
 	     region.extents.x2, region.extents.y2));
+	to_sna_crtc(crtc)->client_damage = region;
 
 	assert(sna->mode.shadow_damage && sna->mode.shadow_active);
 	damage = DamageRegion(sna->mode.shadow_damage);
@@ -2099,6 +2277,8 @@ __sna_crtc_set_mode(xf86CrtcPtr crtc)
 	uint32_t saved_offset;
 	bool saved_transform;
 
+	DBG(("%s\n", __FUNCTION__));
+
 	saved_bo = sna_crtc->bo;
 	saved_transform = sna_crtc->transform;
 	saved_offset = sna_crtc->offset;
@@ -2112,7 +2292,9 @@ retry: /* Attach per-crtc pixmap or dire
 		goto error;
 	}
 
-	kgem_bo_submit(&sna->kgem, bo);
+	/* Prevent recursion when enabling outputs during execbuffer */
+	if (bo->exec && RQ(bo->rq)->bo == NULL)
+		_kgem_submit(&sna->kgem);
 
 	sna_crtc->bo = bo;
 	if (!sna_crtc_apply(crtc)) {
@@ -2143,6 +2325,7 @@ retry: /* Attach per-crtc pixmap or dire
 		sna_crtc_damage(crtc);
 	sna->mode.front_active += saved_bo == NULL;
 	sna->mode.dirty = true;
+	DBG(("%s: front_active=%d\n", __FUNCTION__, sna->mode.front_active));
 
 	return TRUE;
 
@@ -2150,6 +2333,7 @@ error:
 	sna_crtc->offset = saved_offset;
 	sna_crtc->transform = saved_transform;
 	sna_crtc->bo = saved_bo;
+	sna_mode_discover(sna);
 	return FALSE;
 }
 
@@ -2206,11 +2390,12 @@ sna_crtc_dpms(xf86CrtcPtr crtc, int mode
 	assert(priv);
 	priv->dpms_mode = mode;
 
-	if (mode == DPMSModeOn &&
-	    crtc->enabled &&
-	    priv->bo == NULL &&
-	    !__sna_crtc_set_mode(crtc))
-		mode = DPMSModeOff;
+	if (mode == DPMSModeOn && crtc->enabled && priv->bo == NULL) {
+		if (__sna_crtc_set_mode(crtc))
+			update_flush_interval(to_sna(crtc->scrn));
+		else
+			mode = DPMSModeOff;
+	}
 
 	if (mode != DPMSModeOn)
 		sna_crtc_disable(crtc);
@@ -2362,8 +2547,8 @@ static int plane_details(struct sna *sna
 			continue;
 		}
 
-		DBG(("%s: prop[%d] .id=%d, .name=%s, .flags=%x, .value=%ld\n", __FUNCTION__, i,
-		     (long)props[i], prop.name, prop.flags, (long)values[i]));
+		DBG(("%s: prop[%d] .id=%ld, .name=%s, .flags=%x, .value=%ld\n", __FUNCTION__, i,
+		     (long)props[i], prop.name, (unsigned)prop.flags, (long)values[i]));
 
 		if (strcmp(prop.name, "type") == 0) {
 			type = values[i];
@@ -2437,7 +2622,7 @@ sna_crtc_find_planes(struct sna *sna, st
 		return;
 	}
 
-	DBG(("%s: %d planes\n", __FUNCTION__, r.count_planes));
+	DBG(("%s: %d planes\n", __FUNCTION__, (int)r.count_planes));
 
 	if (r.count_planes > ARRAY_SIZE(stack_planes)) {
 		planes = malloc(sizeof(uint32_t)*r.count_planes);
@@ -2556,7 +2741,7 @@ sna_crtc_add(ScrnInfoPtr scrn, int id)
 	}
 	sna_crtc->pipe = get_pipe.pipe;
 
-	if (xf86IsEntityShared(scrn->entityList[0]) &&
+	if (is_zaphod(scrn) &&
 	    scrn->confScreen->device->screen != sna_crtc->pipe) {
 		free(sna_crtc);
 		return true;
@@ -3009,7 +3194,7 @@ sna_output_dpms(xf86OutputPtr output, in
 			     __FUNCTION__, sna_output->backlight_active_level));
 		}
 		sna_output->dpms_mode = dpms;
-		sna_output_backlight_set(sna_output, 0);
+		sna_output_backlight_off(sna_output);
 	}
 
 	if (output->crtc &&
@@ -3022,8 +3207,7 @@ sna_output_dpms(xf86OutputPtr output, in
 	if (sna_output->backlight.iface && dpms == DPMSModeOn) {
 		DBG(("%s: restoring previous backlight %d\n",
 		     __FUNCTION__, sna_output->backlight_active_level));
-		sna_output_backlight_set(sna_output,
-					 sna_output->backlight_active_level);
+		sna_output_backlight_on(sna_output);
 	}
 
 	sna_output->dpms_mode = dpms;
@@ -3500,7 +3684,7 @@ static int name_from_path(struct sna *sn
 			  char *name)
 {
 	struct drm_mode_get_blob blob;
-	char buf[32], *path = buf;
+	char *path;
 	int id;
 
 	id = find_property(sna, sna_output, "PATH");
@@ -3510,20 +3694,19 @@ static int name_from_path(struct sna *sn
 
 	VG_CLEAR(blob);
 	blob.blob_id = sna_output->prop_values[id];
-	blob.length = sizeof(buf)-1;
-	blob.data = (uintptr_t)path;
-	VG(memset(path, 0, blob.length));
+	blob.length = 0;
 	if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob))
 		return 0;
 
-	if (blob.length >= sizeof(buf)) {
-		path = alloca(blob.length + 1);
+	do {
+		id = blob.length;
+		path = alloca(id + 1);
 		blob.data = (uintptr_t)path;
-		VG(memset(path, 0, blob.length));
-		DBG(("%s: reading %d bytes for path blob\n", __FUNCTION__, blob.length));
+		VG(memset(path, 0, id));
+		DBG(("%s: reading %d bytes for path blob\n", __FUNCTION__, id));
 		if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPROPBLOB, &blob))
 			return 0;
-	}
+	} while (id != blob.length);
 
 	path[blob.length] = '\0'; /* paranoia */
 	DBG(("%s: PATH='%s'\n", __FUNCTION__, path));
@@ -3548,7 +3731,8 @@ static int name_from_path(struct sna *sn
 
 		for (n = 0; n < sna->mode.num_real_output; n++) {
 			if (to_sna_output(config->output[n])->id == id)
-				return snprintf(name, 32, "%s-%s", config->output[n]->name, c + 1);
+				return snprintf(name, 32, "%s-%s",
+						config->output[n]->name, c + 1);
 		}
 	}
 
@@ -3636,7 +3820,7 @@ sna_output_add(struct sna *sna, unsigned
 		(void)drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETENCODER, &enc);
 	}
 
-	if (xf86IsEntityShared(scrn->entityList[0])) {
+	if (is_zaphod(scrn)) {
 		const char *str;
 
 		str = xf86GetOptValString(sna->Options, OPTION_ZAPHOD);
@@ -3735,7 +3919,7 @@ sna_output_add(struct sna *sna, unsigned
 	output->name = (char *)(output + 1);
 	memcpy(output->name, name, len + 1);
 
-	output->use_screen_monitor = config->num_output != 1;
+	output->use_screen_monitor = config->num_output != 0;
 	xf86OutputUseScreenMonitor(output, !output->use_screen_monitor);
 	assert(output->options);
 
@@ -3898,7 +4082,7 @@ static void sort_randr_outputs(struct sn
 	}
 }
 
-static void disable_unused_crtc(struct sna *sna)
+static bool disable_unused_crtc(struct sna *sna)
 {
 	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
 	bool update = false;
@@ -3910,7 +4094,6 @@ static void disable_unused_crtc(struct s
 		if (!crtc->enabled)
 			continue;
 
-
 		for (o = 0; o < sna->mode.num_real_output; o++) {
 			xf86OutputPtr output = config->output[o];
 			if (output->crtc == crtc)
@@ -3918,13 +4101,19 @@ static void disable_unused_crtc(struct s
 		}
 
 		if (o == sna->mode.num_real_output) {
+			DBG(("%s: CRTC:%d was enabled with no outputs\n",
+			     __FUNCTION__, to_sna_crtc(crtc)->id));
 			crtc->enabled = false;
 			update = true;
 		}
 	}
 
-	if (update)
+	if (update) {
+		DBG(("%s: disabling unused functions\n", __FUNCTION__));
 		xf86DisableUnusedFunctions(sna->scrn);
+	}
+
+	return update;
 }
 
 void sna_mode_discover(struct sna *sna)
@@ -3952,7 +4141,7 @@ void sna_mode_discover(struct sna *sna)
 	if (res.count_connectors > 32)
 		return;
 
-	assert(sna->mode.num_real_crtc == res.count_crtcs);
+	assert(sna->mode.num_real_crtc == res.count_crtcs || is_zaphod(sna->scrn));
 	assert(sna->mode.max_crtc_width  == res.max_width);
 	assert(sna->mode.max_crtc_height == res.max_height);
 	assert(sna->mode.num_real_encoder == res.count_encoders);
@@ -4034,7 +4223,7 @@ static void copy_front(struct sna *sna, 
 	if (!old_priv)
 		return;
 
-	new_priv = sna_pixmap_force_to_gpu(new, MOVE_WRITE);
+	new_priv = sna_pixmap_force_to_gpu(new, MOVE_WRITE | __MOVE_SCANOUT);
 	if (!new_priv)
 		return;
 
@@ -4186,16 +4375,14 @@ sna_mode_resize(ScrnInfoPtr scrn, int wi
 		xf86CrtcPtr crtc = config->crtc[i];
 
 		assert(to_sna_crtc(crtc) != NULL);
-		if (!crtc->enabled)
+		if (to_sna_crtc(crtc)->bo == NULL)
 			continue;
 
 		if (!__sna_crtc_set_mode(crtc))
 			sna_crtc_disable(crtc);
 	}
 
-	while (sna_mode_has_pending_events(sna))
-		sna_mode_wakeup(sna);
-
+	sna_mode_wakeup(sna);
 	kgem_clean_scanout_cache(&sna->kgem);
 
 	return TRUE;
@@ -4574,6 +4761,7 @@ sna_show_cursors(ScrnInfoPtr scrn)
 		}
 	}
 	sigio_unblock(sigio);
+	sna->cursor.active = true;
 }
 
 static void
@@ -4638,6 +4826,7 @@ sna_hide_cursors(ScrnInfoPtr scrn)
 	int sigio, c;
 
 	DBG(("%s\n", __FUNCTION__));
+	sna->cursor.active = false;
 
 	sigio = sigio_block();
 	for (c = 0; c < sna->mode.num_real_crtc; c++) {
@@ -4982,9 +5171,11 @@ sna_cursors_init(ScreenPtr screen, struc
 static void
 sna_cursors_reload(struct sna *sna)
 {
-	sna_set_cursor_position(sna->scrn,
-				sna->cursor.last_x,
-				sna->cursor.last_y);
+	DBG(("%s: active?=%d\n", __FUNCTION__, sna->cursor.active));
+	if (sna->cursor.active)
+		sna_set_cursor_position(sna->scrn,
+					sna->cursor.last_x,
+					sna->cursor.last_y);
 }
 
 static void
@@ -5013,6 +5204,8 @@ sna_crtc_flip(struct sna *sna, struct sn
 	DBG(("%s CRTC:%d [pipe=%d], handle=%d\n", __FUNCTION__, crtc->id, crtc->pipe, bo->handle));
 
 	assert(sna->mode.num_real_output < ARRAY_SIZE(output_ids));
+	assert(crtc->bo);
+	assert(crtc->kmode.clock);
 
 	for (i = 0; i < sna->mode.num_real_output; i++) {
 		xf86OutputPtr output = config->output[i];
@@ -5027,12 +5220,13 @@ sna_crtc_flip(struct sna *sna, struct sn
 		     (uint32_t)output->possible_clones));
 
 		assert(output->possible_crtcs & (1 << crtc->pipe) ||
-		       xf86IsEntityShared(sna->scrn->entityList[0]));
+		       is_zaphod(sna->scrn));
 
 		output_ids[output_count] = to_connector_id(output);
 		if (++output_count == ARRAY_SIZE(output_ids))
 			return false;
 	}
+	assert(output_count);
 
 	VG_CLEAR(arg);
 	arg.crtc_id = crtc->id;
@@ -5123,6 +5317,11 @@ fixup_flip:
 				crtc->bo->active_scanout--;
 				kgem_bo_destroy(&sna->kgem, crtc->bo);
 
+				if (crtc->shadow_bo) {
+					kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
+					crtc->shadow_bo = NULL;
+				}
+
 				crtc->bo = kgem_bo_reference(bo);
 				crtc->bo->active_scanout++;
 
@@ -5198,6 +5397,7 @@ retry_flip:
 			crtc->flip_bo = kgem_bo_reference(bo);
 			crtc->flip_bo->active_scanout++;
 			crtc->flip_serial = crtc->mode_serial;
+			crtc->flip_pending = true;
 			sna->mode.flip_active++;
 		}
 
@@ -5612,11 +5812,19 @@ sna_crtc_config_notify(ScreenPtr screen)
 	if (!sna->mode.dirty)
 		return;
 
-	probe_capabilities(sna);
-	update_flush_interval(sna);
+	if (disable_unused_crtc(sna)) {
+		/* This will have recursed, so simply bail at this point */
+		assert(sna->mode.dirty == false);
+#ifdef RANDR_12_INTERFACE
+		xf86RandR12TellChanged(screen);
+#endif
+		return;
+	}
 
+	update_flush_interval(sna);
 	sna_cursors_reload(sna);
 
+	probe_capabilities(sna);
 	sna_present_update(sna);
 
 	sna->mode.dirty = false;
@@ -5744,7 +5952,7 @@ sna_mode_set_primary(struct sna *sna)
 	rrScrPrivPtr rr = rrGetScrPriv(xf86ScrnToScreen(sna->scrn));
 	int i;
 
-	if (rr->primaryOutput)
+	if (rr == NULL || rr->primaryOutput)
 		return;
 
 	for (i = 0; i < sna->mode.num_real_output; i++) {
@@ -5762,11 +5970,68 @@ sna_mode_set_primary(struct sna *sna)
 #endif
 }
 
+bool
+sna_mode_disable(struct sna *sna)
+{
+	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
+	int i;
+
+	if (sna->flags & SNA_IS_HOSTED)
+		return false;
+
+	if (!sna->scrn->vtSema)
+		return false;
+
+	/* XXX we will cause previously hidden cursors to be reshown, but
+	 * this should be a rare fixup case for severe fragmentation.
+	 */
+	sna_hide_cursors(sna->scrn);
+	for (i = 0; i < sna->mode.num_real_crtc; i++)
+		sna_crtc_disable(config->crtc[i]);
+	assert(sna->mode.front_active == 0);
+
+	sna_mode_wakeup(sna);
+	kgem_clean_scanout_cache(&sna->kgem);
+	return true;
+}
+
+void
+sna_mode_enable(struct sna *sna)
+{
+	xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn);
+	int i;
+
+	DBG(("%s\n", __FUNCTION__));
+
+	if (sna->flags & SNA_IS_HOSTED)
+		return;
+
+	if (!sna->scrn->vtSema)
+		return;
+
+	for (i = 0; i < sna->mode.num_real_crtc; i++) {
+		xf86CrtcPtr crtc = config->crtc[i];
+
+		DBG(("%s: crtc[%d].enabled?=%d\n", __FUNCTION__, i, crtc->enabled));
+		assert(to_sna_crtc(crtc) != NULL);
+		if (!crtc->enabled)
+			continue;
+
+		if (crtc->mode.Clock == 0)
+			continue;
+
+		__sna_crtc_set_mode(crtc);
+	}
+
+	update_flush_interval(sna);
+	sna_show_cursors(sna->scrn);
+	sna->mode.dirty = false;
+}
+
 void
 sna_mode_close(struct sna *sna)
 {
-	while (sna_mode_has_pending_events(sna))
-		sna_mode_wakeup(sna);
+	sna_mode_wakeup(sna);
 
 	if (sna->flags & SNA_IS_HOSTED)
 		return;
@@ -5777,6 +6042,7 @@ sna_mode_close(struct sna *sna)
 	sna_cursors_fini(sna);
 
 	sna_backlight_close(sna);
+	sna->mode.dirty = false;
 }
 
 void
@@ -6053,7 +6319,7 @@ static bool sna_emit_wait_for_scanline_g
 	event = 1 << (3*full_height + pipe*8);
 
 	b = kgem_get_batch(&sna->kgem);
-	sna->kgem.nbatch += 10;
+	sna->kgem.nbatch += 16;
 
 	b[0] = MI_LOAD_REGISTER_IMM | 1;
 	b[1] = 0x44050; /* DERRMR */
@@ -6061,10 +6327,16 @@ static bool sna_emit_wait_for_scanline_g
 	b[3] = MI_LOAD_REGISTER_IMM | 1;
 	b[4] = 0x4f100; /* magic */
 	b[5] = (1 << 31) | (1 << 30) | pipe << 29 | (y1 << 16) | y2;
-	b[6] = MI_WAIT_FOR_EVENT | event;
-	b[7] = MI_LOAD_REGISTER_IMM | 1;
-	b[8] = 0x44050; /* DERRMR */
-	b[9] = ~0;
+	b[6] = MI_LOAD_REGISTER_IMM | 1;
+	b[7] = 0x2050; /* PSMI_CTL(rcs) */
+	b[8] = 1 << 16 | 1;
+	b[9] = MI_WAIT_FOR_EVENT | event;
+	b[10] = MI_LOAD_REGISTER_IMM | 1;
+	b[11] = 0x2050; /* PSMI_CTL(rcs) */
+	b[12] = 1 << 16;
+	b[13] = MI_LOAD_REGISTER_IMM | 1;
+	b[14] = 0x44050; /* DERRMR */
+	b[15] = ~0;
 
 	sna->kgem.batch_flags |= I915_EXEC_SECURE;
 	return true;
@@ -6283,6 +6555,7 @@ sna_crtc_hide_planes(struct sna *sna, st
 	s.plane_id = crtc->sprite.id;
 	(void)drmIoctl(sna->kgem.fd, LOCAL_IOCTL_MODE_SETPLANE, &s);
 
+	__sna_crtc_disable(sna, crtc);
 	return true;
 }
 
@@ -6329,8 +6602,7 @@ void sna_mode_reset(struct sna *sna)
 	}
 
 	/* drain the event queue */
-	while (sna_mode_has_pending_events(sna))
-		sna_mode_wakeup(sna);
+	sna_mode_wakeup(sna);
 }
 
 static void transformed_box(BoxRec *box, xf86CrtcPtr crtc)
@@ -6356,10 +6628,17 @@ inline static DrawablePtr crtc_source(xf
 {
 	struct sna_crtc *sna_crtc = to_sna_crtc(crtc);
 	if (sna_crtc->slave_pixmap) {
+		DBG(("%s: using slave pixmap=%ld, offset (%d, %d)\n",
+		     __FUNCTION__,
+		     sna_crtc->slave_pixmap->drawable.serialNumber,
+		 -crtc->x, -crtc->y));
 		*sx = -crtc->x;
 		*sy = -crtc->y;
 		return &sna_crtc->slave_pixmap->drawable;
 	} else {
+		DBG(("%s: using Screen pixmap=%ld\n",
+		     __FUNCTION__,
+		     to_sna(crtc->scrn)->front->drawable.serialNumber));
 		*sx = *sy = 0;
 		return &to_sna(crtc->scrn)->front->drawable;
 	}
@@ -6378,7 +6657,7 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr
 	int depth, error;
 	void *ptr;
 
-	DBG(("%s: compositing transformed damage boxes\n", __FUNCTION__));
+	DBG(("%s: compositing transformed damage boxes, target handle=%d\n", __FUNCTION__, bo->handle));
 
 	error = sna_render_format_for_depth(draw->depth);
 	depth = PIXMAN_FORMAT_DEPTH(error);
@@ -6389,6 +6668,10 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr
 		return;
 	}
 
+	DBG(("%s: dst format=%08x, depth=%d, bpp=%d, pitch=%d, size=%dx%d\n",
+	     __FUNCTION__, format->format, depth, draw->bitsPerPixel,
+	     bo->pitch, crtc->mode.HDisplay, crtc->mode.VDisplay));
+
 	ptr = kgem_bo_map__gtt(&sna->kgem, bo);
 	if (ptr == NULL)
 		return;
@@ -6412,7 +6695,7 @@ sna_crtc_redisplay__fallback(xf86CrtcPtr
 	if (error)
 		goto free_src;
 
-	if (crtc->filter)
+	if (crtc->filter && crtc->transform_in_use)
 		SetPicturePictFilter(src, crtc->filter,
 				     crtc->params, crtc->nparams);
 
@@ -6478,6 +6761,10 @@ sna_crtc_redisplay__composite(xf86CrtcPt
 		return;
 	}
 
+	DBG(("%s: dst format=%08x, depth=%d, bpp=%d, pitch=%d, size=%dx%d\n",
+	     __FUNCTION__, format->format, depth, draw->bitsPerPixel,
+	     bo->pitch, crtc->mode.HDisplay, crtc->mode.VDisplay));
+
 	pixmap = sna_pixmap_create_unattached(screen, 0, 0, depth);
 	if (pixmap == NullPixmap)
 		return;
@@ -6502,7 +6789,7 @@ sna_crtc_redisplay__composite(xf86CrtcPt
 	if (error)
 		goto free_src;
 
-	if (crtc->filter)
+	if (crtc->filter && crtc->transform_in_use)
 		SetPicturePictFilter(src, crtc->filter,
 				     crtc->params, crtc->nparams);
 
@@ -6590,6 +6877,8 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, Reg
 	}
 
 	if (crtc->filter == NULL &&
+	    priv->gpu_bo &&
+	    priv->cpu_damage == NULL &&
 	    sna_transform_is_integer_translation(&crtc->crtc_to_framebuffer,
 						 &tx, &ty)) {
 		DrawableRec tmp;
@@ -6614,7 +6903,11 @@ sna_crtc_redisplay(xf86CrtcPtr crtc, Reg
 		sna_crtc_redisplay__fallback(crtc, region, bo);
 }
 
-#define shadow_flip_handler (sna_flip_handler_t)sna_mode_redisplay
+static void shadow_flip_handler(struct drm_event_vblank *e,
+				void *data)
+{
+	sna_mode_redisplay(data);
+}
 
 void sna_shadow_set_crtc(struct sna *sna,
 			 xf86CrtcPtr crtc,
@@ -6630,11 +6923,11 @@ void sna_shadow_set_crtc(struct sna *sna
 	assert(sna_crtc);
 	assert(!sna_crtc->transform);
 
-	if (sna_crtc->shadow_bo != bo) {
-		if (sna_crtc->shadow_bo)
-			kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo);
+	if (sna_crtc->client_bo != bo) {
+		if (sna_crtc->client_bo)
+			kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo);
 
-		sna_crtc->shadow_bo = kgem_bo_reference(bo);
+		sna_crtc->client_bo = kgem_bo_reference(bo);
 		sna_crtc_damage(crtc);
 	}
 
@@ -6647,19 +6940,50 @@ void sna_shadow_set_crtc(struct sna *sna
 	priv->move_to_gpu_data = sna;
 }
 
+void sna_shadow_steal_crtcs(struct sna *sna, struct list *list)
+{
+	list_init(list);
+	while (!list_is_empty(&sna->mode.shadow_crtc)) {
+		RegionRec sub, *damage;
+		struct sna_crtc *crtc =
+			list_first_entry(&sna->mode.shadow_crtc,
+					 struct sna_crtc,
+					 shadow_link);
+
+		damage = DamageRegion(sna->mode.shadow_damage);
+		sub.extents = crtc->base->bounds;
+		sub.data = NULL;
+		RegionSubtract(damage, damage, &sub);
+
+		list_move(&crtc->shadow_link, list);
+	}
+}
+
+void sna_shadow_unsteal_crtcs(struct sna *sna, struct list *list)
+{
+	while (!list_is_empty(list)) {
+		struct sna_crtc *crtc =
+			list_first_entry(list,
+					 struct sna_crtc,
+					 shadow_link);
+		assert(crtc->client_bo);
+		sna_shadow_set_crtc(sna, crtc->base, crtc->client_bo);
+	}
+}
+
 void sna_shadow_unset_crtc(struct sna *sna,
-			 xf86CrtcPtr crtc)
+			   xf86CrtcPtr crtc)
 {
 	struct sna_crtc *sna_crtc = to_sna_crtc(crtc);
 
 	DBG(("%s: clearin shadow override for CRTC:%d\n",
 	     __FUNCTION__, sna_crtc->id));
 
-	if (sna_crtc->shadow_bo == NULL)
+	if (sna_crtc->client_bo == NULL)
 		return;
 
-	kgem_bo_destroy(&sna->kgem, sna_crtc->shadow_bo);
-	sna_crtc->shadow_bo = NULL;
+	kgem_bo_destroy(&sna->kgem, sna_crtc->client_bo);
+	sna_crtc->client_bo = NULL;
 	list_del(&sna_crtc->shadow_link);
 	sna->mode.shadow_dirty = true;
 
@@ -6675,13 +6999,17 @@ void sna_mode_redisplay(struct sna *sna)
 	if (!sna->mode.shadow_damage)
 		return;
 
-	DBG(("%s: posting shadow damage? %d (flips pending? %d)\n",
+	DBG(("%s: posting shadow damage? %d (flips pending? %d, mode reconfiguration pending? %d)\n",
 	     __FUNCTION__,
 	     !RegionNil(DamageRegion(sna->mode.shadow_damage)),
-	     sna->mode.flip_active));
+	     sna->mode.flip_active,
+	     sna->mode.dirty));
 	assert((sna->flags & SNA_IS_HOSTED) == 0);
 	assert(sna->mode.shadow_active);
 
+	if (sna->mode.dirty)
+		return;
+
 	region = DamageRegion(sna->mode.shadow_damage);
 	if (RegionNil(region))
 		return;
@@ -6697,8 +7025,8 @@ void sna_mode_redisplay(struct sna *sna)
 		damage = sna->mode.shadow_damage;
 		sna->mode.shadow_damage = NULL;
 
-		while (sna->mode.flip_active && sna_mode_has_pending_events(sna))
-			sna_mode_wakeup(sna);
+		while (sna->mode.flip_active && sna_mode_wakeup(sna))
+			;
 
 		sna->mode.shadow_damage = damage;
 	}
@@ -6726,8 +7054,92 @@ void sna_mode_redisplay(struct sna *sna)
 			damage.extents = crtc->bounds;
 			damage.data = NULL;
 			RegionIntersect(&damage, &damage, region);
-			if (RegionNotEmpty(&damage))
-				sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo);
+			if (!box_empty(&damage.extents)) {
+				struct kgem_bo *bo = NULL;
+
+				DBG(("%s: fallback intersects pipe=%d [(%d, %d), (%d, %d)]\n",
+				     __FUNCTION__, sna_crtc->pipe,
+				     damage.extents.x1, damage.extents.y1,
+				     damage.extents.x2, damage.extents.y2));
+
+				if (sna->flags & SNA_TEAR_FREE) {
+					RegionRec new_damage;
+
+					RegionNull(&new_damage);
+					RegionCopy(&new_damage, &damage);
+
+					bo = sna_crtc->client_bo;
+					if (bo == NULL) {
+						damage.extents = crtc->bounds;
+						damage.data = NULL;
+						bo = kgem_create_2d(&sna->kgem,
+								crtc->mode.HDisplay,
+								crtc->mode.VDisplay,
+								crtc->scrn->bitsPerPixel,
+								sna_crtc->bo->tiling,
+								CREATE_SCANOUT);
+					} else
+						RegionUnion(&damage, &damage, &sna_crtc->client_damage);
+
+					DBG(("%s: TearFree fallback, shadow handle=%d, crtc handle=%d\n", __FUNCTION__, bo->handle, sna_crtc->bo->handle));
+
+					sna_crtc->client_damage = new_damage;
+				}
+
+				if (bo == NULL)
+					bo = sna_crtc->bo;
+				sna_crtc_redisplay__fallback(crtc, &damage, bo);
+
+				if (bo != sna_crtc->bo) {
+					struct drm_mode_crtc_page_flip arg;
+
+					arg.crtc_id = sna_crtc->id;
+					arg.fb_id = get_fb(sna, bo,
+							   crtc->mode.HDisplay,
+							   crtc->mode.VDisplay);
+
+					arg.user_data = (uintptr_t)sna_crtc;
+					arg.flags = DRM_MODE_PAGE_FLIP_EVENT;
+					arg.reserved = 0;
+
+					if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) {
+						if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) {
+							assert(sna_crtc->bo->active_scanout);
+							assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout);
+							sna_crtc->bo->active_scanout--;
+							kgem_bo_destroy(&sna->kgem, sna_crtc->bo);
+
+							sna_crtc->bo = bo;
+							sna_crtc->bo->active_scanout++;
+							sna_crtc->client_bo = NULL;
+						} else {
+							DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
+							     __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno));
+							xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+								   "Page flipping failed, disabling TearFree\n");
+							sna->flags &= ~SNA_TEAR_FREE;
+
+							damage.extents = crtc->bounds;
+							damage.data = NULL;
+							sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo);
+
+							kgem_bo_destroy(&sna->kgem, bo);
+							sna_crtc->client_bo = NULL;
+						}
+					} else {
+						sna->mode.flip_active++;
+
+						assert(sna_crtc->flip_bo == NULL);
+						sna_crtc->flip_handler = shadow_flip_handler;
+						sna_crtc->flip_data = sna;
+						sna_crtc->flip_bo = bo;
+						sna_crtc->flip_bo->active_scanout++;
+						sna_crtc->flip_serial = sna_crtc->mode_serial;
+
+						sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo);
+					}
+				}
+			}
 			RegionUninit(&damage);
 
 			if (sna_crtc->slave_damage)
@@ -6782,7 +7194,13 @@ void sna_mode_redisplay(struct sna *sna)
 		damage.data = NULL;
 
 		RegionIntersect(&damage, &damage, region);
-		if (RegionNotEmpty(&damage)) {
+		DBG(("%s: crtc[%d] damage? %d[%d]: %dx[(%d, %d), (%d, %d)]\n",
+		     __FUNCTION__, i,
+		     !box_empty(&damage.extents), RegionNotEmpty(&damage),
+		     region_num_rects(&damage),
+		     damage.extents.x1, damage.extents.y1,
+		     damage.extents.x2, damage.extents.y2));
+		if (!box_empty(&damage.extents)) {
 			if (sna->flags & SNA_TEAR_FREE) {
 				struct drm_mode_crtc_page_flip arg;
 				struct kgem_bo *bo;
@@ -6791,7 +7209,7 @@ void sna_mode_redisplay(struct sna *sna)
 				damage.extents = crtc->bounds;
 				damage.data = NULL;
 
-				bo = sna_crtc->shadow_bo;
+				bo = sna_crtc->client_bo;
 				if (bo == NULL)
 					bo = kgem_create_2d(&sna->kgem,
 							    crtc->mode.HDisplay,
@@ -6817,42 +7235,59 @@ void sna_mode_redisplay(struct sna *sna)
 				arg.reserved = 0;
 
 				if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_PAGE_FLIP, &arg)) {
-					BoxRec box;
-					DrawableRec tmp;
+					if (sna_crtc_flip(sna, sna_crtc, bo, 0, 0)) {
+						assert(sna_crtc->bo->active_scanout);
+						assert(sna_crtc->bo->refcnt >= sna_crtc->bo->active_scanout);
+						sna_crtc->bo->active_scanout--;
+						kgem_bo_destroy(&sna->kgem, sna_crtc->bo);
+
+						sna_crtc->bo = kgem_bo_reference(bo);
+						sna_crtc->bo->active_scanout++;
+						sna_crtc->client_bo = kgem_bo_reference(bo);
+					} else {
+						BoxRec box;
+						DrawableRec tmp;
+
+						DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
+						     __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno));
+						xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+							   "Page flipping failed, disabling TearFree\n");
+						sna->flags &= ~SNA_TEAR_FREE;
 
-					DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
-					     __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno));
 disable1:
-					box.x1 = 0;
-					box.y1 = 0;
-					tmp.width = box.x2 = crtc->mode.HDisplay;
-					tmp.height = box.y2 = crtc->mode.VDisplay;
-					tmp.depth = sna->front->drawable.depth;
-					tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel;
-
-					if (!sna->render.copy_boxes(sna, GXcopy,
-								    &sna->front->drawable, bo, 0, 0,
-								    &tmp, sna_crtc->bo, 0, 0,
-								    &box, 1, COPY_LAST)) {
-						xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
-							   "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n",
-							   __FUNCTION__, sna_crtc->id, sna_crtc->pipe);
-						sna_crtc_disable(crtc);
-					}
+						box.x1 = 0;
+						box.y1 = 0;
+						tmp.width = box.x2 = crtc->mode.HDisplay;
+						tmp.height = box.y2 = crtc->mode.VDisplay;
+						tmp.depth = sna->front->drawable.depth;
+						tmp.bitsPerPixel = sna->front->drawable.bitsPerPixel;
+
+						if (!sna->render.copy_boxes(sna, GXcopy,
+									    &sna->front->drawable, bo, 0, 0,
+									    &tmp, sna_crtc->bo, 0, 0,
+									    &box, 1, COPY_LAST)) {
+							xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR,
+								   "%s: page flipping failed, disabling CRTC:%d (pipe=%d)\n",
+								   __FUNCTION__, sna_crtc->id, sna_crtc->pipe);
+							sna_crtc_disable(crtc);
+						}
 
-					kgem_bo_destroy(&sna->kgem, bo);
-					sna_crtc->shadow_bo = NULL;
+						kgem_bo_destroy(&sna->kgem, bo);
+						sna_crtc->client_bo = NULL;
+					}
 					continue;
 				}
 				sna->mode.flip_active++;
 
 				assert(sna_crtc->flip_bo == NULL);
 				sna_crtc->flip_handler = shadow_flip_handler;
+				sna_crtc->flip_data = sna;
 				sna_crtc->flip_bo = bo;
 				sna_crtc->flip_bo->active_scanout++;
 				sna_crtc->flip_serial = sna_crtc->mode_serial;
+				sna_crtc->flip_pending = true;
 
-				sna_crtc->shadow_bo = kgem_bo_reference(sna_crtc->bo);
+				sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo);
 			} else {
 				sna_crtc_redisplay(crtc, &damage, sna_crtc->bo);
 				kgem_scanout_flush(&sna->kgem, sna_crtc->bo);
@@ -6870,7 +7305,7 @@ disable1:
 		struct drm_mode_crtc_page_flip arg;
 		uint32_t fb = 0;
 
-		DBG(("%s: flipping tear-free outputs, current scanout handle=%d [active?=%d], new handle=%d [active=%d]\n",
+		DBG(("%s: flipping TearFree outputs, current scanout handle=%d [active?=%d], new handle=%d [active=%d]\n",
 		     __FUNCTION__, old->handle, old->active_scanout, new->handle, new->active_scanout));
 
 		assert(new != old);
@@ -6899,14 +7334,14 @@ disable1:
 			arg.crtc_id = crtc->id;
 			arg.user_data = (uintptr_t)crtc;
 
-			if (crtc->shadow_bo) {
+			if (crtc->client_bo) {
 				DBG(("%s: apply shadow override bo for CRTC:%d on pipe=%d, handle=%d\n",
-				     __FUNCTION__, crtc->id, crtc->pipe, crtc->shadow_bo->handle));
-				arg.fb_id = get_fb(sna, crtc->shadow_bo,
+				     __FUNCTION__, crtc->id, crtc->pipe, crtc->client_bo->handle));
+				arg.fb_id = get_fb(sna, crtc->client_bo,
 						   crtc->base->mode.HDisplay,
 						   crtc->base->mode.VDisplay);
 				assert(arg.fb_id != fb);
-				flip_bo = crtc->shadow_bo;
+				flip_bo = crtc->client_bo;
 				x = y = 0;
 			} else {
 				if (fb == 0)
@@ -6942,7 +7377,7 @@ fixup_shadow:
 				continue;
 
 			if (flip_bo->pitch != crtc->bo->pitch || (y << 16 | x)  != crtc->offset) {
-				DBG(("%s: changing pitch (%d == %d) or offset (%x == %x)\n",
+				DBG(("%s: changing pitch (new %d =?= old %d) or offset (new %x =?= old %x)\n",
 				     __FUNCTION__,
 				     flip_bo->pitch, crtc->bo->pitch,
 				     y << 16 | x, crtc->offset));
@@ -6954,9 +7389,18 @@ fixup_flip:
 					crtc->bo->active_scanout--;
 					kgem_bo_destroy(&sna->kgem, crtc->bo);
 
+					if (crtc->shadow_bo) {
+						kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
+						crtc->shadow_bo = NULL;
+					}
+
 					crtc->bo = kgem_bo_reference(flip_bo);
 					crtc->bo->active_scanout++;
 				} else {
+					xf86DrvMsg(sna->scrn->scrnIndex, X_ERROR,
+						   "Failed to prepare CRTC for page flipping, disabling TearFree\n");
+					sna->flags &= ~SNA_TEAR_FREE;
+
 					if (sna->mode.flip_active == 0) {
 						DBG(("%s: abandoning flip attempt\n", __FUNCTION__));
 						goto fixup_shadow;
@@ -6979,9 +7423,11 @@ fixup_flip:
 
 			assert(crtc->flip_bo == NULL);
 			crtc->flip_handler = shadow_flip_handler;
+			crtc->flip_data = sna;
 			crtc->flip_bo = kgem_bo_reference(flip_bo);
 			crtc->flip_bo->active_scanout++;
 			crtc->flip_serial = crtc->mode_serial;
+			crtc->flip_pending = true;
 
 			{
 				struct drm_i915_gem_busy busy = { flip_bo->handle };
@@ -7014,17 +7460,32 @@ fixup_flip:
 	RegionEmpty(region);
 }
 
-void sna_mode_wakeup(struct sna *sna)
+int sna_mode_wakeup(struct sna *sna)
 {
 	char buffer[1024];
 	int len, i;
+	int ret = 0;
+
+again:
+	/* In order to workaround a kernel bug in not honouring O_NONBLOCK,
+	 * check that the fd is readable before attempting to read the next
+	 * event from drm.
+	 */
+	if (!event_pending(sna->kgem.fd))
+		return ret;
 
 	/* The DRM read semantics guarantees that we always get only
 	 * complete events.
 	 */
 	len = read(sna->kgem.fd, buffer, sizeof (buffer));
 	if (len < (int)sizeof(struct drm_event))
-		return;
+		return ret;
+
+	/* Note that we cannot rely on the passed in struct sna matching
+	 * the struct sna used for the vblank event (in case it was submitted
+	 * by a different ZaphodHead). When processing the event, we must
+	 * ensure that we only use the pointer passed along with the event.
+	 */
 
 	DBG(("%s: len=%d\n", __FUNCTION__, len));
 
@@ -7034,18 +7495,22 @@ void sna_mode_wakeup(struct sna *sna)
 		switch (e->type) {
 		case DRM_EVENT_VBLANK:
 			if (((uintptr_t)((struct drm_event_vblank *)e)->user_data) & 2)
-				sna_present_vblank_handler(sna, (struct drm_event_vblank *)e);
+				sna_present_vblank_handler((struct drm_event_vblank *)e);
 			else
-				sna_dri2_vblank_handler(sna, (struct drm_event_vblank *)e);
+				sna_dri2_vblank_handler((struct drm_event_vblank *)e);
 			break;
 		case DRM_EVENT_FLIP_COMPLETE:
 			{
 				struct drm_event_vblank *vbl = (struct drm_event_vblank *)e;
 				struct sna_crtc *crtc = (void *)(uintptr_t)vbl->user_data;
 
+				/* Beware Zaphod! */
+				sna = to_sna(crtc->base->scrn);
+
 				crtc->swap.tv_sec = vbl->tv_sec;
 				crtc->swap.tv_usec = vbl->tv_usec;
 				crtc->swap.msc = msc64(crtc, vbl->sequence);
+				crtc->flip_pending = false;
 
 				assert(crtc->flip_bo);
 				assert(crtc->flip_bo->active_scanout);
@@ -7059,6 +7524,11 @@ void sna_mode_wakeup(struct sna *sna)
 					crtc->bo->active_scanout--;
 					kgem_bo_destroy(&sna->kgem, crtc->bo);
 
+					if (crtc->shadow_bo) {
+						kgem_bo_destroy(&sna->kgem, crtc->shadow_bo);
+						crtc->shadow_bo = NULL;
+					}
+
 					crtc->bo = crtc->flip_bo;
 					crtc->flip_bo = NULL;
 				} else {
@@ -7070,12 +7540,15 @@ void sna_mode_wakeup(struct sna *sna)
 				DBG(("%s: flip complete, pending? %d\n", __FUNCTION__, sna->mode.flip_active));
 				assert(sna->mode.flip_active);
 				if (--sna->mode.flip_active == 0)
-					crtc->flip_handler(sna, vbl, crtc->flip_data);
+					crtc->flip_handler(vbl, crtc->flip_data);
 			}
 			break;
 		default:
 			break;
 		}
 		i += e->length;
+		ret++;
 	}
+
+	goto again;
 }

Index: xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h
diff -u xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h:1.2 xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h:1.3
--- xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h:1.2	Wed Nov  5 17:58:59 2014
+++ xsrc/external/mit/xf86-video-intel/dist/src/uxa/intel.h	Fri Jan 16 21:32:11 2015
@@ -54,7 +54,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
 #include "xorg-server.h"
 #include "xf86_OSproc.h"
 #include "compiler.h"
-#include "xf86Pci.h"
 #include "xf86Cursor.h"
 #include "xf86xv.h"
 #include "xf86Crtc.h"
@@ -93,64 +92,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
 
 #define MAX_PIPES 4 /* consider making all users dynamic */
 
-struct intel_pixmap {
-	dri_bo *bo;
-
-	struct list batch;
-
-	uint16_t stride;
-	uint8_t tiling;
-	int8_t busy :2;
-	uint8_t dirty :1;
-	uint8_t offscreen :1;
-	uint8_t pinned :5;
-#define PIN_SCANOUT 0x1
-#define PIN_DRI2 0x2
-#define PIN_DRI3 0x4
-#define PIN_PRIME 0x8
-#define PIN_GLAMOR 0x10
-};
-
-#if HAS_DEVPRIVATEKEYREC
-extern DevPrivateKeyRec uxa_pixmap_index;
-#else
-extern int uxa_pixmap_index;
-#endif
-
-static inline struct intel_pixmap *intel_get_pixmap_private(PixmapPtr pixmap)
-{
-#if HAS_DEVPRIVATEKEYREC
-	return dixGetPrivate(&pixmap->devPrivates, &uxa_pixmap_index);
-#else
-	return dixLookupPrivate(&pixmap->devPrivates, &uxa_pixmap_index);
-#endif
-}
-
-static inline Bool intel_pixmap_is_busy(struct intel_pixmap *priv)
-{
-	if (priv->busy == -1)
-		priv->busy = drm_intel_bo_busy(priv->bo);
-	return priv->busy;
-}
-
-static inline void intel_set_pixmap_private(PixmapPtr pixmap, struct intel_pixmap *intel)
-{
-	dixSetPrivate(&pixmap->devPrivates, &uxa_pixmap_index, intel);
-}
-
-static inline Bool intel_pixmap_is_dirty(PixmapPtr pixmap)
-{
-	return pixmap && intel_get_pixmap_private(pixmap)->dirty;
-}
-
-static inline Bool intel_pixmap_tiled(PixmapPtr pixmap)
-{
-	return intel_get_pixmap_private(pixmap)->tiling != I915_TILING_NONE;
-}
-
-dri_bo *intel_get_pixmap_bo(PixmapPtr pixmap);
-void intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo);
-
 #include "common.h"
 
 #define PITCH_NONE 0
@@ -171,6 +112,7 @@ enum dri_type {
 
 typedef struct intel_screen_private {
 	ScrnInfoPtr scrn;
+	struct intel_device *dev;
 	int cpp;
 
 #define RENDER_BATCH			I915_EXEC_RENDER
@@ -179,12 +121,12 @@ typedef struct intel_screen_private {
 
 	void *modes;
 	drm_intel_bo *front_buffer, *back_buffer;
-	PixmapPtr back_pixmap;
 	unsigned int back_name;
 	long front_pitch, front_tiling;
 
 	dri_bufmgr *bufmgr;
 
+#if USE_UXA
 	uint32_t batch_ptr[4096];
 	/** Byte offset in batch_ptr for the next dword to be emitted. */
 	unsigned int batch_used;
@@ -200,6 +142,7 @@ typedef struct intel_screen_private {
 	struct list batch_pixmaps;
 	drm_intel_bo *wa_scratch_bo;
 	OsTimerPtr cache_expire;
+#endif
 
 	/* For Xvideo */
 	Bool use_overlay;
@@ -223,7 +166,6 @@ typedef struct intel_screen_private {
 
 	int Chipset;
 	EntityInfoPtr pEnt;
-	struct pci_device *PciInfo;
 	const struct intel_device_info *info;
 
 	unsigned int BR[20];
@@ -236,8 +178,10 @@ typedef struct intel_screen_private {
 	void (*batch_flush) (struct intel_screen_private *intel);
 	void (*batch_commit_notify) (struct intel_screen_private *intel);
 
+#if USE_UXA
 	struct _UxaDriver *uxa_driver;
 	int uxa_flags;
+#endif
 	Bool need_sync;
 	int accel_pixmap_offset_alignment;
 	int accel_max_x;
@@ -270,6 +214,7 @@ typedef struct intel_screen_private {
 		drm_intel_bo *gen6_depth_stencil_bo;
 	} video;
 
+#if USE_UXA
 	/* Render accel state */
 	float scale_units[2][2];
 	/** Transform pointers for src/mask, or NULL if identity */
@@ -327,6 +272,7 @@ typedef struct intel_screen_private {
 
 	/* 965 render acceleration state */
 	struct gen4_render_state *gen4_render_state;
+#endif
 
 	/* DRI enabled this generation. */
 	enum dri_type dri2, dri3;
@@ -378,14 +324,14 @@ typedef struct intel_screen_private {
 #define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
 
 /* Some chips have specific errata (or limits) that we need to workaround. */
-#define IS_I830(intel) ((intel)->PciInfo->device_id == PCI_CHIP_I830_M)
-#define IS_845G(intel) ((intel)->PciInfo->device_id == PCI_CHIP_845_G)
-#define IS_I865G(intel) ((intel)->PciInfo->device_id == PCI_CHIP_I865_G)
+#define IS_I830(intel) (intel_get_device_id((intel)->dev) == PCI_CHIP_I830_M)
+#define IS_845G(intel) (intel_get_device_id((intel)->dev) == PCI_CHIP_845_G)
+#define IS_I865G(intel) (intel_get_device_id((intel)->dev) == PCI_CHIP_I865_G)
 
-#define IS_I915G(pI810) ((intel)->PciInfo->device_id == PCI_CHIP_I915_G || (intel)->PciInfo->device_id == PCI_CHIP_E7221_G)
-#define IS_I915GM(pI810) ((intel)->PciInfo->device_id == PCI_CHIP_I915_GM)
+#define IS_I915G(pI810) (intel_get_device_id((intel)->dev) == PCI_CHIP_I915_G || intel_get_device_id((intel)->dev) == PCI_CHIP_E7221_G)
+#define IS_I915GM(pI810) (intel_get_device_id((intel)->dev) == PCI_CHIP_I915_GM)
 
-#define IS_965_Q(pI810) ((intel)->PciInfo->device_id == PCI_CHIP_I965_Q)
+#define IS_965_Q(pI810) (intel_get_device_id((intel)->dev) == PCI_CHIP_I965_Q)
 
 /* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
 #define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
@@ -501,22 +447,10 @@ intel_get_screen_private(ScrnInfoPtr scr
 #define MIN(a,b)	((a) < (b) ? (a) : (b))
 #endif
 
-static inline unsigned long intel_pixmap_pitch(PixmapPtr pixmap)
-{
-	return (unsigned long)pixmap->devKind;
-}
-
-/* Batchbuffer support macros and functions */
-#include "intel_batchbuffer.h"
-
-/* I830 specific functions */
-extern void IntelEmitInvarientState(ScrnInfoPtr scrn);
-extern void I830EmitInvarientState(ScrnInfoPtr scrn);
-extern void I915EmitInvarientState(ScrnInfoPtr scrn);
-
-extern void I830EmitFlush(ScrnInfoPtr scrn);
+extern void intel_video_init(ScreenPtr pScreen);
+extern void intel_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b);
+extern void intel_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box);
 
-extern void I830InitVideo(ScreenPtr pScreen);
 extern xf86CrtcPtr intel_covering_crtc(ScrnInfoPtr scrn, BoxPtr box,
 				      xf86CrtcPtr desired, BoxPtr crtc_box_ret);
 
@@ -540,175 +474,16 @@ unsigned long intel_get_fence_pitch(inte
 Bool intel_check_display_stride(ScrnInfoPtr scrn, int stride, Bool tiling);
 void intel_set_gem_max_sizes(ScrnInfoPtr scrn);
 
+unsigned int
+intel_compute_size(struct intel_screen_private *intel,
+                   int w, int h, int bpp, unsigned usage,
+                   uint32_t *tiling, int *stride);
+
 drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn,
 					 int width, int height, int cpp,
 					 int *out_stride,
 					 uint32_t *out_tiling);
 
-/* i830_render.c */
-Bool i830_check_composite(int op,
-			  PicturePtr sourcec, PicturePtr mask, PicturePtr dest,
-			  int width, int height);
-Bool i830_check_composite_target(PixmapPtr pixmap);
-Bool i830_check_composite_texture(ScreenPtr screen, PicturePtr picture);
-Bool i830_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
-			    PicturePtr dest, PixmapPtr sourcecPixmap,
-			    PixmapPtr maskPixmap, PixmapPtr destPixmap);
-void i830_composite(PixmapPtr dest, int srcX, int srcY,
-		    int maskX, int maskY, int dstX, int dstY, int w, int h);
-void i830_vertex_flush(intel_screen_private *intel);
-
-/* i915_render.c */
-Bool i915_check_composite(int op,
-			  PicturePtr sourcec, PicturePtr mask, PicturePtr dest,
-			  int width, int height);
-Bool i915_check_composite_target(PixmapPtr pixmap);
-Bool i915_check_composite_texture(ScreenPtr screen, PicturePtr picture);
-Bool i915_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
-			    PicturePtr dest, PixmapPtr sourcecPixmap,
-			    PixmapPtr maskPixmap, PixmapPtr destPixmap);
-void i915_composite(PixmapPtr dest, int srcX, int srcY,
-		    int maskX, int maskY, int dstX, int dstY, int w, int h);
-void i915_vertex_flush(intel_screen_private *intel);
-void i915_batch_commit_notify(intel_screen_private *intel);
-void i830_batch_commit_notify(intel_screen_private *intel);
-/* i965_render.c */
-unsigned int gen4_render_state_size(ScrnInfoPtr scrn);
-void gen4_render_state_init(ScrnInfoPtr scrn);
-void gen4_render_state_cleanup(ScrnInfoPtr scrn);
-Bool i965_check_composite(int op,
-			  PicturePtr sourcec, PicturePtr mask, PicturePtr dest,
-			  int width, int height);
-Bool i965_check_composite_texture(ScreenPtr screen, PicturePtr picture);
-Bool i965_prepare_composite(int op, PicturePtr sourcec, PicturePtr mask,
-			    PicturePtr dest, PixmapPtr sourcecPixmap,
-			    PixmapPtr maskPixmap, PixmapPtr destPixmap);
-void i965_composite(PixmapPtr dest, int srcX, int srcY,
-		    int maskX, int maskY, int dstX, int dstY, int w, int h);
-
-void i965_vertex_flush(intel_screen_private *intel);
-void i965_batch_flush(intel_screen_private *intel);
-void i965_batch_commit_notify(intel_screen_private *intel);
-
-/* i965_3d.c */
-void gen6_upload_invariant_states(intel_screen_private *intel);
-void gen6_upload_viewport_state_pointers(intel_screen_private *intel,
-					 drm_intel_bo *cc_vp_bo);
-void gen7_upload_viewport_state_pointers(intel_screen_private *intel,
-					 drm_intel_bo *cc_vp_bo);
-void gen6_upload_urb(intel_screen_private *intel);
-void gen7_upload_urb(intel_screen_private *intel);
-void gen6_upload_cc_state_pointers(intel_screen_private *intel,
-				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo,
-				   uint32_t blend_offset);
-void gen7_upload_cc_state_pointers(intel_screen_private *intel,
-				   drm_intel_bo *blend_bo, drm_intel_bo *cc_bo,
-				   drm_intel_bo *depth_stencil_bo,
-				   uint32_t blend_offset);
-void gen6_upload_sampler_state_pointers(intel_screen_private *intel,
-					drm_intel_bo *sampler_bo);
-void gen7_upload_sampler_state_pointers(intel_screen_private *intel,
-					drm_intel_bo *sampler_bo);
-void gen7_upload_bypass_states(intel_screen_private *intel);
-void gen6_upload_gs_state(intel_screen_private *intel);
-void gen6_upload_vs_state(intel_screen_private *intel);
-void gen6_upload_clip_state(intel_screen_private *intel);
-void gen6_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
-void gen7_upload_sf_state(intel_screen_private *intel, int num_sf_outputs, int read_offset);
-void gen6_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
-void gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset);
-void gen6_upload_depth_buffer_state(intel_screen_private *intel);
-void gen7_upload_depth_buffer_state(intel_screen_private *intel);
-
-Bool intel_transform_is_affine(PictTransformPtr t);
-Bool
-intel_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
-				 float *x_out, float *y_out);
-
-Bool
-intel_get_transformed_coordinates_3d(int x, int y, PictTransformPtr transform,
-				    float *x_out, float *y_out, float *z_out);
-
-static inline void
-intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...) _X_ATTRIBUTE_PRINTF(2, 3);
-
-static inline void
-intel_debug_fallback(ScrnInfoPtr scrn, const char *format, ...)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-	va_list ap;
-
-	va_start(ap, format);
-	if (intel->fallback_debug) {
-		xf86DrvMsg(scrn->scrnIndex, X_INFO, "fallback: ");
-		LogVMessageVerb(X_INFO, 1, format, ap);
-	}
-	va_end(ap);
-}
-
-static inline Bool
-intel_check_pitch_2d(PixmapPtr pixmap)
-{
-	uint32_t pitch = intel_pixmap_pitch(pixmap);
-	if (pitch > KB(32)) {
-		ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen);
-		intel_debug_fallback(scrn, "pitch exceeds 2d limit 32K\n");
-		return FALSE;
-	}
-	return TRUE;
-}
-
-/* For pre-965 chip only, as they have 8KB limit for 3D */
-static inline Bool
-intel_check_pitch_3d(PixmapPtr pixmap)
-{
-	uint32_t pitch = intel_pixmap_pitch(pixmap);
-	if (pitch > KB(8)) {
-		ScrnInfoPtr scrn = xf86ScreenToScrn(pixmap->drawable.pScreen);
-		intel_debug_fallback(scrn, "pitch exceeds 3d limit 8K\n");
-		return FALSE;
-	}
-	return TRUE;
-}
-
-/**
- * Little wrapper around drm_intel_bo_reloc to return the initial value you
- * should stuff into the relocation entry.
- *
- * If only we'd done this before settling on the library API.
- */
-static inline uint32_t
-intel_emit_reloc(drm_intel_bo * bo, uint32_t offset,
-		 drm_intel_bo * target_bo, uint32_t target_offset,
-		 uint32_t read_domains, uint32_t write_domain)
-{
-	drm_intel_bo_emit_reloc(bo, offset, target_bo, target_offset,
-				read_domains, write_domain);
-
-	return target_bo->offset + target_offset;
-}
-
-static inline drm_intel_bo *intel_bo_alloc_for_data(intel_screen_private *intel,
-						    const void *data,
-						    unsigned int size,
-						    const char *name)
-{
-	drm_intel_bo *bo;
-	int ret;
-
-	bo = drm_intel_bo_alloc(intel->bufmgr, name, size, 4096);
-	assert(bo);
-
-	ret = drm_intel_bo_subdata(bo, 0, size, data);
-	assert(ret == 0);
-
-	return bo;
-	(void)ret;
-}
-
-void intel_debug_flush(ScrnInfoPtr scrn);
-
 static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable)
 {
 	ScreenPtr screen = drawable->pScreen;
@@ -726,18 +501,16 @@ static inline Bool pixmap_is_scanout(Pix
 	return pixmap == screen->GetScreenPixmap(screen);
 }
 
-Bool intel_uxa_init(ScreenPtr pScreen);
-Bool intel_uxa_create_screen_resources(ScreenPtr pScreen);
-void intel_uxa_block_handler(intel_screen_private *intel);
-Bool intel_get_aperture_space(ScrnInfoPtr scrn, drm_intel_bo ** bo_table,
-			      int num_bos);
-
-static inline Bool intel_pixmap_is_offscreen(PixmapPtr pixmap)
+static inline int
+intel_pixmap_pitch(PixmapPtr pixmap)
 {
-	struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
-	return priv && priv->offscreen;
+	return (unsigned long)pixmap->devKind;
 }
 
+/*
+ * intel_sync.c
+ */
+
 #if HAVE_DRI3
 Bool intel_sync_init(ScreenPtr screen);
 void intel_sync_close(ScreenPtr screen);
@@ -762,4 +535,13 @@ Bool intel_present_screen_init(ScreenPtr
 static inline Bool intel_present_screen_init(ScreenPtr screen) { return 0; }
 #endif
 
+dri_bo *
+intel_get_pixmap_bo(PixmapPtr pixmap);
+
+void
+intel_set_pixmap_bo(PixmapPtr pixmap, dri_bo *bo);
+
+void
+intel_flush(intel_screen_private *intel);
+
 #endif /* _I830_H_ */

Reply via email to