diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index fc6af10..023f2d2 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -114,6 +114,8 @@ NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
 	if (!NV50EXA2DSurfaceFormat(ppix, &fmt))
 		return FALSE;
 
+	ErrorF("is_src %d ppix 0x%08X tiled %d width %d height %d\n", is_src, ppix, surf->bo->tiled, ppix->drawable.width, ppix->drawable.height);
+
 	bo_flags  = NOUVEAU_BO_VRAM;
 	bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR;
 
@@ -274,6 +276,8 @@ NV50EXACopy(PixmapPtr pdpix, int srcX , int srcY,
 {
 	NV50EXA_LOCALS(pdpix);
 
+	ErrorF("srcX %d srcY %d dstX %d dstY %d width %d height %d\n", srcX, srcY, dstX, dstY, width, height);
+
 	BEGIN_RING(chan, eng2d, 0x0110, 1);
 	OUT_RING  (chan, 0);
 	BEGIN_RING(chan, eng2d, 0x088c, 1);
diff --git a/src/nv_exa.c b/src/nv_exa.c
index ee5a08e..38c5663 100644
--- a/src/nv_exa.c
+++ b/src/nv_exa.c
@@ -254,6 +254,123 @@ static inline Bool NVAccelMemcpyRect(char *dst, const char *src, int height,
 	return TRUE;
 }
 
+/*
+ * Generic M2MF function to copy a buffer object.
+ * Partial copies are possible too.
+ * Buffer objects must have equal size.
+ */
+static inline bool
+nv_accel_m2mf_copy_bo(ScrnInfoPtr pScrn, struct nouveau_bo_copy *info)
+{
+	NVPtr pNv = NVPTR(pScrn);
+	struct nouveau_channel *chan = pNv->chan;
+	struct nouveau_grobj *m2mf = pNv->NvMemFormat;
+	uint32_t src_flags = 0, dst_flags = 0;
+	int line_len = info->src_line_len, cpp = info->cpp;
+	int h = info->h, x = info->x, y = info->y;
+	int src_pitch = 0, dst_pitch = 0, src_offset = 0, dst_offset = 0;
+	struct nouveau_bo *src = info->src, *dst = info->dst;
+
+	if (src->size != dst->size) {
+		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Trying to copy two buffer objects of different size.\n");
+		return FALSE;
+	}
+
+	ErrorF("src_pitch %d dst_pitch %d src_height %d dst_height %d x %d y %d w %d h %d cpp %d\n", info->src_pitch, info->dst_pitch, info->src_height, info->dst_height, x, y, info->w, h, cpp);
+
+	/* Getting the actual flags requires pinning the bo. */
+	src_flags = NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+	dst_flags = NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+
+	/* Some methods always need the pitch, some only when !tiled. */
+	if (!src->tiled) {
+		src_pitch  = info->src_pitch;
+		src_offset = (y * src_pitch) + (x * cpp);
+	}
+
+	if (!dst->tiled) {
+		dst_pitch  = info->dst_pitch;
+		dst_offset = (y * dst_pitch) + (x * cpp);
+	}
+
+	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+	OUT_RELOCo(chan, src, src_flags | NOUVEAU_BO_RD);
+	OUT_RELOCo(chan, dst, dst_flags | NOUVEAU_BO_WR);
+
+	/* This code assumes the default tile size of 8x8. */
+	if (pNv->Architecture >= NV_ARCH_50) {
+		if (src->tiled) {
+			BEGIN_RING(chan, m2mf, 0x0200, 6);
+			OUT_RING  (chan, 0);
+			OUT_RING  (chan, 0); /* Something to do with tilesize iirc. */
+			OUT_RING  (chan, info->src_pitch  /*info->src_line_len*/);
+			OUT_RING  (chan, info->src_height);
+			OUT_RING  (chan, 1);
+			OUT_RING  (chan, 0);
+		} else {
+			BEGIN_RING(chan, m2mf, 0x0200, 1);
+			OUT_RING  (chan, 1);
+		}
+
+		if (dst->tiled) {
+			BEGIN_RING(chan, m2mf, 0x021c, 6);
+			OUT_RING  (chan, 0);
+			OUT_RING  (chan, 0); /* Something to do with tilesize iirc. */
+			OUT_RING  (chan, info->dst_pitch /*info->dst_line_len*/);
+			OUT_RING  (chan, info->dst_height);
+			OUT_RING  (chan, 1);
+			OUT_RING  (chan, 0);
+		} else {
+			BEGIN_RING(chan, m2mf, 0x021c, 1);
+			OUT_RING  (chan, 1);
+		}
+	}
+
+	while (h) {
+		int line_count = h;
+
+		/* HW limitations */
+		if (line_count > 2047)
+			line_count = 2047;
+
+		if (pNv->Architecture >= NV_ARCH_50) {
+			if (src->tiled) {
+				BEGIN_RING(chan, m2mf, 0x0218, 1);
+				OUT_RING  (chan, (y << 16) | (x * cpp));
+			}
+
+			if (dst->tiled) {
+				BEGIN_RING(chan, m2mf, 0x0234, 1);
+				OUT_RING  (chan, (y << 16) | (x * cpp));
+			}
+
+			BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
+			OUT_RELOCh(chan, src, src_offset, src_flags | NOUVEAU_BO_RD);
+			OUT_RELOCh(chan, dst, dst_offset, dst_flags | NOUVEAU_BO_WR);
+		}
+
+		BEGIN_RING(chan, m2mf,
+			   NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+		OUT_RELOCl(chan, src, src_offset, src_flags | NOUVEAU_BO_RD);
+		OUT_RELOCl(chan, dst, dst_offset, dst_flags | NOUVEAU_BO_WR);
+		OUT_RING(chan, info->src_pitch);
+		OUT_RING(chan, info->dst_pitch);
+		OUT_RING(chan, line_len);
+		OUT_RING(chan, line_count);
+		OUT_RING(chan, (1<<8)|1);
+		OUT_RING(chan, 0);
+
+		if (!src->tiled)
+			src_offset += line_count * src_pitch;
+		if (!dst->tiled)
+			dst_offset += line_count * dst_pitch;
+		h -= line_count;
+		y += line_count;
+	}
+
+	return TRUE;
+}
+
 static inline Bool
 NVAccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
 		    char *dst, unsigned dst_pitch)
@@ -683,18 +800,78 @@ NVExaPrepareAccess(PixmapPtr pPix, int index)
 	ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
 	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_pixmap *nvpix;
+	bool already_prepared = false;
 	(void)pNv;
-
+	
 	nvpix = exaGetPixmapDriverPrivate(pPix);
 	if (!nvpix || !nvpix->bo)
 		return FALSE;
 
-	if (!nvpix->bo->map) {
-		if (nouveau_bo_map(nvpix->bo, NOUVEAU_BO_RDWR))
+	ErrorF("PrepareAccess %d on pixmap 0x%16X\n", index, pPix);
+
+	if (pPix->drawable.bitsPerPixel < 8) {
+		ErrorF("bpp 8 is the minimum\n");
+		return FALSE;
+	}
+
+	if (nvpix->flags)
+		already_prepared = true;
+
+	nvpix->flags |= (1 << index);
+
+	if (already_prepared)
+		return TRUE;
+
+	if (pPix->devPrivate.ptr)
+		ErrorF("devPrivate.ptr is non-NULL\n");
+
+	if (nvpix->bo->tiled) {
+		if (!nvpix->lbo || !nvpix->lbo->map) {
+			ErrorF("Tiled PrepareAccess\n");
+			struct nouveau_bo_copy info;
+			uint32_t flags = NOUVEAU_BO_VRAM;
+			int ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->bo->size, &nvpix->lbo);
+			if (ret) {
+				xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to allocate linear bo for copying.\n");
+				return FALSE;
+			}
+
+			info.src = nvpix->bo;
+			info.dst = nvpix->lbo;
+			info.x = info.y = 0;
+			info.w = pPix->drawable.width;
+			info.h = pPix->drawable.height;
+			info.cpp = pPix->drawable.bitsPerPixel >> 3;
+			info.src_pitch = info.dst_pitch = exaGetPixmapPitch(pPix);
+			info.src_line_len = info.dst_line_len = info.w * info.cpp;
+			info.src_height = info.dst_height = info.h;
+
+			ret = nv_accel_m2mf_copy_bo(pScrn, &info);
+			if (!ret) {
+				xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to copy tiled data to linear bo.\n");
+				nouveau_bo_ref(NULL, &nvpix->lbo);
+				return FALSE;
+			}
+
+			if (nouveau_bo_map(nvpix->lbo, NOUVEAU_BO_RDWR)) {
+				xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Mapping of linear bo failed\n");
+				nouveau_bo_ref(NULL, &nvpix->lbo);
+				return FALSE;
+			}
+
+			pPix->devPrivate.ptr = nvpix->lbo->map;
+		}
+
+		pPix->devPrivate.ptr = nvpix->lbo->map;
+	} else {
+		if (nouveau_bo_map(nvpix->bo, NOUVEAU_BO_RDWR)) {
+			xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Mapping of bo failed\n");
 			return FALSE;
+		}
+
+		pPix->devPrivate.ptr = nvpix->bo->map;
 	}
 
-	pPix->devPrivate.ptr = nvpix->bo->map;
 	return TRUE;
 }
 
@@ -710,7 +887,38 @@ NVExaFinishAccess(PixmapPtr pPix, int index)
 	if (!nvpix || !nvpix->bo)
 		return;
 
-	nouveau_bo_unmap(nvpix->bo);
+	ErrorF("FinishAccess %d on pixmap 0x%16X\n", index, pPix);
+
+	nvpix->flags &= ~(1 << index);
+
+	/* Someone else still needs it. */
+	if (nvpix->flags)
+		return;
+
+	if (nvpix->bo->tiled) {
+		ErrorF("Tiled FinishAccess\n");
+		struct nouveau_bo_copy info;
+		int ret;
+
+		info.src = nvpix->lbo;
+		info.dst = nvpix->bo;
+		info.x = info.y = 0;
+		info.w = pPix->drawable.width;
+		info.h = pPix->drawable.height;
+		info.cpp = pPix->drawable.bitsPerPixel >> 3;
+		info.src_pitch = info.dst_pitch = exaGetPixmapPitch(pPix);
+		info.src_line_len = info.dst_line_len = info.w * info.cpp;
+		info.src_height = info.dst_height = info.h;
+
+		ret = nv_accel_m2mf_copy_bo(pScrn, &info);
+		if (!ret)
+			xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Failed to copy linear data to tiled bo.\n");
+
+		nouveau_bo_unmap(nvpix->lbo);
+		nouveau_bo_ref(NULL, &nvpix->lbo);
+	} else
+		nouveau_bo_unmap(nvpix->bo);
+
 	pPix->devPrivate.ptr = NULL;
 }
 
@@ -726,6 +934,9 @@ NVExaPixmapIsOffscreen(PixmapPtr pPix)
 	if (!nvpix || !nvpix->bo)
 		return FALSE;
 
+	if (pPix->drawable.bitsPerPixel < 8)
+		return FALSE;
+
 	return TRUE;
 }
 
@@ -791,8 +1002,9 @@ NVExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, int depth,
 			 */
 			uint32_t bpp = pPixmap->drawable.bitsPerPixel;
 
-			/* At some point we should just keep bpp 1 pixmaps in sysram. */
-			flags = NOUVEAU_BO_VRAM;
+			/* Keep bpp 1 pixmaps in sysram. */
+			if (pPixmap->drawable.bitsPerPixel >= 8)
+				flags |= NOUVEAU_BO_VRAM;
 
 			/* Assuming that exa doesn't mess with devKind. */
 			/* The migration code isn't touched so that is a fairly safe assumption. */
@@ -809,12 +1021,23 @@ NVExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height, int depth,
 			if (nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
 					   &nvpix->bo)) {
 				xfree(nvpix);
+				ErrorF("nouveau_bo_new failed\n");
 				return FALSE;
 			}
 
 			/* We don't want devPrivate.ptr set at all. */
 			miModifyPixmapHeader(pPixmap, width, height, depth, bitsPerPixel, devKind, NULL);
 
+			/* Permanently map sysram pixmaps. This is what exa expects for !offscreen. */
+			if (pPixmap->drawable.bitsPerPixel < 8) {
+				if (nouveau_bo_map(nvpix->bo, NOUVEAU_BO_RDWR)) {
+					ErrorF("Mapping of sysram pixmap failed.\n");
+					return FALSE;
+				}
+				pPixmap->devPrivate.ptr = nvpix->bo->map;
+				ErrorF("ptr 0x%08X\n", pPixmap->devPrivate.ptr);
+			}
+
 			/* Returning TRUE means the ModifyPixmapHeader chain needs to be stopped. */
 			/* Otherwise you end up with devKind being overriden again. */
 			return TRUE;
diff --git a/src/nv_type.h b/src/nv_type.h
index 0e6e890..40f8dae 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -357,9 +357,25 @@ enum LVDS_script {
 
 struct nouveau_pixmap {
 	struct nouveau_bo *bo;
+	struct nouveau_bo *lbo; /* Linear copy for nv50. */
+	uint32_t flags; /* tiled access flags */
 	int size;
 };
 
+struct nouveau_bo_copy {
+	/* Buffer objects. */
+	struct nouveau_bo *src;
+	struct nouveau_bo *dst;
+
+	/* Dimensions to copy. */
+	int x, y, w, h;
+
+	/* Pixmap information. */
+	int cpp;
+	int src_pitch, src_line_len, src_height;
+	int dst_pitch, dst_line_len, dst_height;
+};
+
 static inline struct nouveau_pixmap *
 nouveau_pixmap(PixmapPtr pPixmap)
 {
@@ -604,7 +620,7 @@ typedef struct _NVPortPrivRec {
 
 #define TIMER_MASK      (OFF_TIMER | FREE_TIMER)
 
-#if 0
+#if 1
 #define NOUVEAU_FALLBACK(fmt,args...) do {        \
 	fprintf(stderr, "FALLBACK: "fmt, ##args); \
 	return FALSE;                             \
