Not much has changed, except that i've test run these patches for a
few weeks now.

I'm actually surprised i've avoided Out-Of-Memory issues.

Patch 2 remains a temporary hack, awaiting a structural fix by darktama.

The whole thing works fine, but XSHM is an issue
(http://stillunknown.livejournal.com/928.html). With it disabled most
apps are fine, although a few issues remain.

As long as rendering isn't too dependent on software rendering (in
pixman) then things are fine (better?).

I will be away for some time in a few weeks, so i'm not sure if i will
see the proper solution to patch 2.

Maarten.
From 3425f32eb0d5c664cd5a4141812bc002960de795 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Sat, 7 Mar 2009 23:49:19 +0100
Subject: [PATCH 1/6] nv50: implement wfb

- Only for sufficiently new xserver's and exa_driver_pixmaps.
---
 src/nouveau_exa.c |  282 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/nv50_exa.c    |   31 +++++-
 src/nv_driver.c   |   52 ++++++++--
 src/nv_proto.h    |   10 ++
 src/nv_type.h     |    4 +-
 5 files changed, 357 insertions(+), 22 deletions(-)

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b7bcc87..74804ec 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -89,7 +89,7 @@ NVAccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h,
 			if (!linear) {
 				BEGIN_RING(chan, m2mf, 0x0200, 7);
 				OUT_RING  (chan, 0);
-				OUT_RING  (chan, 0);
+				OUT_RING  (chan, nv50_exa_get_tile_mode(pspix));
 				OUT_RING  (chan, pspix->drawable.width * cpp);
 				OUT_RING  (chan, pspix->drawable.height);
 				OUT_RING  (chan, 1);
@@ -210,7 +210,7 @@ NVAccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h,
 			if (!linear) {
 				BEGIN_RING(chan, m2mf, 0x021c, 7);
 				OUT_RING  (chan, 0);
-				OUT_RING  (chan, 0);
+				OUT_RING  (chan, nv50_exa_get_tile_mode(pdpix));
 				OUT_RING  (chan, pdpix->drawable.width * cpp);
 				OUT_RING  (chan, pdpix->drawable.height);
 				OUT_RING  (chan, 1);
@@ -259,7 +259,11 @@ nouveau_exa_mark_sync(ScreenPtr pScreen)
 static void
 nouveau_exa_wait_marker(ScreenPtr pScreen, int marker)
 {
-	NVSync(xf86Screens[pScreen->myNum]);
+	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
+
+	if (!pNv->exa_driver_pixmaps)
+		NVSync(xf86Screens[pScreen->myNum]);
 }
 
 static Bool
@@ -351,17 +355,32 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
 
 	if (!nvpix->bo && nvpix->size) {
 		uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
-		/* At some point we should just keep 1bpp pixmaps in sysram */
 		uint32_t flags = NOUVEAU_BO_VRAM;
 		int ret;
 
 		if (pNv->Architecture >= NV_ARCH_50 && cpp) {
-			uint32_t aw = (width + 7) & ~7;
-			uint32_t ah = (height + 7) & ~7;
+			uint32_t ah;
+			if (height > 47) {
+				ah = (height + 63) & ~63;
+				nvpix->tiling_mode = 5;
+			} else if (height > 23) {
+				ah = (height + 31) & ~31;
+				nvpix->tiling_mode = 4;
+			} else if (height > 11) {
+				ah = (height + 15) & ~15;
+				nvpix->tiling_mode = 3;
+			} else if (height > 5) {
+				ah = (height + 7) & ~7;
+				nvpix->tiling_mode = 2;
+			} else {
+				ah = (height + 3) & ~3;
+				nvpix->tiling_mode = 1;
+			}
 
 			flags |= NOUVEAU_BO_TILED;
 
-			devkind = ((aw * cpp) + 63) & ~63;
+			/* This allignment is very important. */
+			devkind = (width * cpp + 63) & ~63;
 			nvpix->size = devkind * ah;
 		}
 
@@ -390,8 +409,11 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
 	NVPtr pNv = NVPTR(pScrn);
 
 	if (pNv->exa_driver_pixmaps) {
-		if (!nouveau_pixmap_bo(ppix)->tiled)
+		if (!nouveau_pixmap_bo(ppix))
+			return false;
+		if (nouveau_pixmap_bo(ppix)->tiled == 0)
 			return false;
+		return true;
 	} else
 	if (pNv->Architecture < NV_ARCH_50 ||
 	    exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
@@ -403,10 +425,12 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
 static void *
 nouveau_exa_pixmap_map(PixmapPtr ppix)
 {
+	ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
 	unsigned delta = nouveau_pixmap_offset(ppix);
 
-	if (bo->tiled) {
+	if (!pNv->wfb_enabled && bo->tiled) {
 		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
 		nvpix->map_refcount++;
@@ -430,9 +454,11 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
 static void
 nouveau_exa_pixmap_unmap(PixmapPtr ppix)
 {
+	ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
 
-	if (bo->tiled) {
+	if (!pNv->wfb_enabled && bo->tiled) {
 		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
 		if (--nvpix->map_refcount)
@@ -682,3 +708,239 @@ nouveau_exa_init(ScreenPtr pScreen)
 	pNv->EXADriverPtr = exa;
 	return TRUE;
 }
+
+/* WFB functions. */
+
+static inline FbBits
+nouveau_exa_wfb_read_memory_linear(const void *src, int size)
+{
+	FbBits bits = 0;
+
+	memcpy(&bits, src, size);
+
+	return bits;
+}
+
+static inline void
+nouveau_exa_wfb_write_memory_linear(void *dst, FbBits value, int size)
+{
+	memcpy(dst, &value, size);
+}
+
+#define LINEAR_PITCH (pPixmap->devKind)
+
+/* Wfb related data. */
+static struct {
+	PixmapPtr ppix;
+	bool used;
+	bool tiled;
+	unsigned long start;
+	unsigned long end;
+	uint64_t multiply_factor;
+	uint8_t cpp;
+	unsigned int tile_height;
+	unsigned int num_tiles_width;
+} wfb_pixmaps[6];
+
+/* height: empty, 2, 4, 8, 16, 32, 64 */
+const unsigned int num_tiles[] = { 0, 0, 4, 2, 1, 1, 1 };
+const unsigned int tile_pitch[] = { 0, 3, 5, 6, 6, 6, 6 };
+const unsigned int mask_height_inv[] = {~0, ~1, ~3, ~7, ~15, ~31, ~63 };
+const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
+const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
+const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
+
+#define X_REMAINDER (x & mask_pitch[tile_height])
+#define Y_REMAINDER (y & mask_height[tile_height])
+
+/* tile_height and tile_pitch are expressed in powers of two */
+static inline unsigned int
+nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
+	unsigned int x, unsigned int y, unsigned int num_tiles_width)
+{
+	offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
+
+	if (tile_height > 1)
+		offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+	else
+		offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
+
+	return offset;
+}
+
+/* Note, we can only expose one read and write function, the linear versions are for internal consumption. */
+static FbBits
+nouveau_exa_wfb_read_memory(const void *src, int size)
+{
+	int i;
+	uint64_t line_x, line_y;
+	unsigned long offset = (unsigned long) src, subpixel_offset;
+	PixmapPtr pPixmap = NULL;
+	FbBits bits = 0;
+	void *new_src;
+
+	/* Find the right pixmap. */
+	for (i = 0; i < 6; i++)
+		if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+			pPixmap = wfb_pixmaps[i].ppix;
+			break;
+		}
+
+	if (!pPixmap || !wfb_pixmaps[i].tiled)
+		return nouveau_exa_wfb_read_memory_linear(src, size);
+
+	/* Now comes the decoding. */
+	offset -= (unsigned long) pPixmap->devPrivate.ptr;
+	/* Assuming dword alligned offsets. */
+	subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+	offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+	/* Determine the coordinate first. */
+	/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+	line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+	line_x = offset - line_y * LINEAR_PITCH;
+
+	new_src = pPixmap->devPrivate.ptr +
+		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+		subpixel_offset;
+
+	memcpy(&bits, new_src, size);
+
+	return bits;
+}
+
+static void
+nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
+{
+	int i;
+	uint64_t line_x, line_y;
+	unsigned long offset = (unsigned long) dst, subpixel_offset;
+	PixmapPtr pPixmap = NULL;
+	void *new_dst;
+
+	/* Find the right pixmap. */
+	for (i = 0; i < 6; i++)
+		if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) {
+			pPixmap = wfb_pixmaps[i].ppix;
+			break;
+		}
+
+	if (!pPixmap || !wfb_pixmaps[i].tiled) {
+		nouveau_exa_wfb_write_memory_linear(dst, value, size);
+		return;
+	}
+
+	/* Now comes the decoding. */
+	offset -= (unsigned long) pPixmap->devPrivate.ptr;
+	/* Assuming dword alligned offsets. */
+	subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1);
+	offset &= ~(wfb_pixmaps[i].cpp - 1);
+
+	/* Determine the coordinate first. */
+	/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+	line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32;
+	line_x = offset - line_y * LINEAR_PITCH;
+
+	new_dst = pPixmap->devPrivate.ptr +
+		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+		subpixel_offset;
+
+	memcpy(new_dst, &value, size);
+}
+
+void
+nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+				WriteMemoryProcPtr *pWrite,
+				DrawablePtr pDraw)
+{
+	PixmapPtr pPixmap;
+	struct nouveau_pixmap *nvpix;
+
+	if (!pRead || !pWrite)
+		return;
+
+	pPixmap = NVGetDrawablePixmap(pDraw);
+	if (!pPixmap)
+		return;
+
+	nvpix = nouveau_pixmap(pPixmap);
+
+	int i;
+	for (i = 0; i < 6; i++)
+		if (!wfb_pixmaps[i].used)
+			break;
+
+	if (i == 6) {
+		ErrorF("More than 6 wraps are setup, what the hell is going on?\n");
+		*pRead = NULL;
+		*pWrite = NULL;
+		return;
+	}
+
+	/* We will get a pointer, somewhere in the range of this pixmap. */
+	/* Based on linear representation ofcource. */
+	wfb_pixmaps[i].ppix = pPixmap;
+	wfb_pixmaps[i].start = (unsigned long) pPixmap->devPrivate.ptr;
+	if (!nvpix || !nvpix->bo)
+		wfb_pixmaps[i].end = wfb_pixmaps[i].start;
+	else
+		wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
+	wfb_pixmaps[i].used = true;
+	wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+	/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
+	wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
+	wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
+	if (!nvpix) {
+		wfb_pixmaps[i].tile_height = 0;
+		wfb_pixmaps[i].num_tiles_width = 0;
+	} else {
+		wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
+		wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
+	}
+
+	*pRead = nouveau_exa_wfb_read_memory;
+	*pWrite = nouveau_exa_wfb_write_memory;
+}
+
+void
+nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
+{
+	PixmapPtr pPixmap;
+	int i;
+
+	pPixmap = NVGetDrawablePixmap(pDraw);
+	if (!pPixmap)
+		return;
+
+	for (i = 0; i < 6; i++)
+		if (wfb_pixmaps[i].ppix == pPixmap) {
+			wfb_pixmaps[i].ppix = NULL;
+			wfb_pixmaps[i].start = 0;
+			wfb_pixmaps[i].end = 0;
+			wfb_pixmaps[i].used = false;
+			wfb_pixmaps[i].tiled = false;
+			wfb_pixmaps[i].multiply_factor = 0;
+			wfb_pixmaps[i].cpp = 0;
+			wfb_pixmaps[i].tile_height = 0;
+			wfb_pixmaps[i].num_tiles_width = 0;
+			break;
+		}
+}
+
+void
+nouveau_exa_wfb_init()
+{
+	int i;
+
+	for (i = 0; i < 6; i++) {
+		wfb_pixmaps[i].ppix = NULL;
+		wfb_pixmaps[i].start = 0;
+		wfb_pixmaps[i].end = 0;
+		wfb_pixmaps[i].used = false;
+		wfb_pixmaps[i].tiled = false;
+		wfb_pixmaps[i].multiply_factor = 0;
+		wfb_pixmaps[i].cpp = 0;
+		wfb_pixmaps[i].tile_height = 0;
+		wfb_pixmaps[i].num_tiles_width = 0;
+	}
+}
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index 3831ec3..ac6b6b4 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -72,6 +72,31 @@ NV50EXABlendOp[] = {
 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) },
 };
 
+
+uint32_t
+nv50_exa_get_tile_mode(PixmapPtr ppix)
+{
+	struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+	if (!nvpix)
+		return 0x00;
+
+	switch (nvpix->tiling_mode) {
+		case 1: /* pitch 32, height 4 */
+			return 0x00;
+		case 2: /* pitch 64, height 8 */
+			return 0x10;
+		case 3: /* pitch 64, height 16 */
+			return 0x20;
+		case 4: /* pitch 64, height 32 */
+			return 0x30;
+		case 5: /* pitch 64, height 64 */
+			return 0x40;
+		default:
+			return 0x00;
+	}
+}
+
 static Bool
 NV50EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt)
 {
@@ -128,7 +153,7 @@ NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
 		BEGIN_RING(chan, eng2d, mthd, 5);
 		OUT_RING  (chan, fmt);
 		OUT_RING  (chan, 0);
-		OUT_RING  (chan, 0);
+		OUT_RING  (chan, nv50_exa_get_tile_mode(ppix));
 		OUT_RING  (chan, 1);
 		OUT_RING  (chan, 0);
 	}
@@ -457,7 +482,7 @@ NV50EXARenderTarget(PixmapPtr ppix, PicturePtr ppict)
 	OUT_RELOCh(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RING  (chan, format);
-	OUT_RING  (chan, 0);
+	OUT_RING  (chan, nv50_exa_get_tile_mode(ppix));
 	OUT_RING  (chan, 0x00000000);
 	BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
 	OUT_RING  (chan, ppix->drawable.width);
@@ -577,7 +602,7 @@ NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit)
 		NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n");
 	}
 	OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
-	OUT_RING  (chan, 0xd0005000);
+	OUT_RING  (chan, 0xd0005000 | (nv50_exa_get_tile_mode(ppix) << 18));
 	OUT_RING  (chan, 0x00300000);
 	OUT_RING  (chan, ppix->drawable.width);
 	OUT_RING  (chan, (1 << NV50TIC_0_5_DEPTH_SHIFT) | ppix->drawable.height);
diff --git a/src/nv_driver.c b/src/nv_driver.c
index 4f07836..f056c68 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -158,6 +158,12 @@ static const char *fbSymbols[] = {
     NULL
 };
 
+static const char *wfbSymbols[] = {
+    "wfbPictureInit",
+    "wfbScreenInit",
+    NULL
+};
+
 static const char *exaSymbols[] = {
     "exaDriverInit",
     "exaOffscreenInit",
@@ -279,7 +285,7 @@ nouveauSetup(pointer module, pointer opts, int *errmaj, int *errmin)
 		 * Tell the loader about symbols from other modules that this module
 		 * might refer to.
 		 */
-		LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols,
+		LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols, wfbSymbols,
 				shadowSymbols, drmSymbols,
 				i2cSymbols, ddcSymbols, vbeSymbols,
 				int10Symbols, NULL);
@@ -1521,10 +1527,23 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
 	 * section.
 	 */
 
-	if (xf86LoadSubModule(pScrn, "fb") == NULL)
-		NVPreInitFail("\n");
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+	if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
+		pNv->wfb_enabled = true;
+		nouveau_exa_wfb_init();
+		if (xf86LoadSubModule(pScrn, "wfb") == NULL)
+			NVPreInitFail("\n");
 
-	xf86LoaderReqSymLists(fbSymbols, NULL);
+		xf86LoaderReqSymLists(wfbSymbols, NULL);
+	} else
+#endif
+	{
+		pNv->wfb_enabled = false;
+		if (xf86LoadSubModule(pScrn, "fb") == NULL)
+			NVPreInitFail("\n");
+
+		xf86LoaderReqSymLists(fbSymbols, NULL);
+	}
 
 	/* Load EXA if needed */
 	if (!pNv->NoAccel) {
@@ -2127,9 +2146,19 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 	switch (pScrn->bitsPerPixel) {
 		case 16:
 		case 32:
-			ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
-				pScrn->xDpi, pScrn->yDpi,
-				displayWidth, pScrn->bitsPerPixel);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+			if (pNv->wfb_enabled) {
+				ret = wfbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+					pScrn->xDpi, pScrn->yDpi,
+					displayWidth, pScrn->bitsPerPixel,
+					nouveau_exa_wfb_setup_wrap, nouveau_exa_wfb_finish_wrap);
+			} else
+#endif
+			{
+				ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY,
+					pScrn->xDpi, pScrn->yDpi,
+					displayWidth, pScrn->bitsPerPixel);
+			}
 			break;
 		default:
 			xf86DrvMsg(scrnIndex, X_ERROR,
@@ -2154,7 +2183,14 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 		}
 	}
 
-	fbPictureInit (pScreen, 0, 0);
+#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
+	if (pNv->wfb_enabled) {
+		wfbPictureInit(pScreen, 0, 0);
+	} else
+#endif
+	{
+		fbPictureInit(pScreen, 0, 0);
+	}
 
 	xf86SetBlackWhitePixels(pScreen);
 
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 1d00e1a..762b22e 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -18,6 +18,10 @@ void NVAccelFree(ScrnInfoPtr pScrn);
 
 /* in nv_driver.c */
 Bool   NVI2CInit(ScrnInfoPtr pScrn);
+/* We can only include fb.h in normal or wfb mode,
+ * so we have to declare one ourself. */
+extern Bool wfbPictureInit (ScreenPtr pScreen,
+		PictFormatPtr formats, int nformats);
 
 /* in nv_dri.c */
 Bool NVDRIScreenInit(ScrnInfoPtr pScrn);
@@ -69,6 +73,11 @@ void  NVTakedownDma(ScrnInfoPtr pScrn);
 Bool nouveau_exa_init(ScreenPtr pScreen);
 Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
 bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
+				WriteMemoryProcPtr *pWrite,
+				DrawablePtr pDraw);
+void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
+void nouveau_exa_wfb_init();
 
 /* in nv_hw.c */
 void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int);
@@ -225,6 +234,7 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer);
 Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn);
 
 /* in nv50_exa.c */
+uint32_t nv50_exa_get_tile_mode(PixmapPtr ppix);
 Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel);
 void NV50EXASolid(PixmapPtr, int, int, int, int);
 void NV50EXADoneSolid(PixmapPtr);
diff --git a/src/nv_type.h b/src/nv_type.h
index f03c198..aafaef6 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -304,7 +304,8 @@ typedef struct _NVRec {
 
     uint8_t cur_head;
     ExaDriverPtr	EXADriverPtr;
-    Bool		exa_driver_pixmaps;
+    Bool exa_driver_pixmaps;
+    bool wfb_enabled;
     ScreenBlockHandlerProcPtr BlockHandler;
     CloseScreenProcPtr  CloseScreen;
     /* Cursor */
@@ -477,6 +478,7 @@ struct nouveau_pixmap {
 	void *linear;
 	unsigned size;
 	int map_refcount;
+	int tiling_mode;
 };
 
 static inline struct nouveau_pixmap *
-- 
1.6.2

From 78457b975acd680469aa82800588540dd74142be Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Fri, 13 Mar 2009 09:57:12 +0100
Subject: [PATCH 2/6] exa: smarter initial mapping of driver allocated pixmaps

- Use a software copy until the first accelerated op, then UTS it.
- The path back does not exist.
---
 src/nouveau_exa.c |   64 +++++++++++++++++++++++++++++++++++++++++++++++-----
 src/nv04_exa.c    |    5 ++++
 src/nv10_exa.c    |    5 ++++
 src/nv30_exa.c    |    5 ++++
 src/nv40_exa.c    |    5 ++++
 src/nv50_exa.c    |   10 ++++++++
 src/nv_proto.h    |    1 +
 src/nv_type.h     |    1 +
 8 files changed, 89 insertions(+), 7 deletions(-)

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 74804ec..72e783e 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -328,6 +328,9 @@ nouveau_exa_destroy_pixmap(ScreenPtr pScreen, void *priv)
 	if (!nvpix)
 		return;
 
+	if (nvpix->linear)
+		xfree(nvpix->linear);
+
 	nouveau_bo_ref(NULL, &nvpix->bo);
 	xfree(nvpix);
 }
@@ -422,6 +425,39 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix)
 	return true;
 }
 
+void
+nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix)
+{
+	ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
+	struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+
+	if (!pNv->exa_driver_pixmaps)
+		return;
+
+	if (nvpix->linear) {
+		int cpp = ppix->drawable.bitsPerPixel >> 3;
+		if ((ppix->drawable.width * ppix->drawable.height * cpp) < 16*1024) {
+			if (pNv->Architecture == NV_ARCH_50)
+				NV50EXAUploadSIFC(nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+							ppix->drawable.height, cpp);
+			else
+				NV04EXAUploadIFC(pScrn, nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width,
+							ppix->drawable.height, cpp);
+			exaMarkSync(ppix->drawable.pScreen);
+		} else {
+			NVAccelUploadM2MF(ppix, 0, 0, ppix->drawable.width,
+				  ppix->drawable.height, nvpix->linear,
+				  ppix->devKind);
+		}
+
+		xfree(nvpix->linear);
+		nvpix->linear = NULL;
+	}
+
+	nvpix->dirty = true;
+}
+
 static void *
 nouveau_exa_pixmap_map(PixmapPtr ppix)
 {
@@ -429,15 +465,26 @@ nouveau_exa_pixmap_map(PixmapPtr ppix)
 	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
 	unsigned delta = nouveau_pixmap_offset(ppix);
+	struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
-	if (!pNv->wfb_enabled && bo->tiled) {
-		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
+	/* Attempt to handle first access with more grace.
+	 * This handles multiple sw accesses (such as trapezoid rasterisation).
+	 * This avoids the first access on potentially uncached memory.
+	 */
+	if (!nvpix->dirty) {
+		if (!nvpix->linear)
+			nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
 
+		nouveau_bo_map(bo, NOUVEAU_BO_RDWR);
+		return nvpix->linear;
+	}
+
+	if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
 		nvpix->map_refcount++;
 		if (nvpix->linear)
 			return nvpix->linear;
 
-		nvpix->linear = xcalloc(1, ppix->devKind * ppix->drawable.height);
+		nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height);
 
 		NVAccelDownloadM2MF(ppix, 0, 0, ppix->drawable.width,
 				    ppix->drawable.height, nvpix->linear,
@@ -457,10 +504,9 @@ nouveau_exa_pixmap_unmap(PixmapPtr ppix)
 	ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum];
 	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_bo *bo = nouveau_pixmap_bo(ppix);
+	struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
 
-	if (!pNv->wfb_enabled && bo->tiled) {
-		struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix);
-
+	if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) {
 		if (--nvpix->map_refcount)
 			return;
 
@@ -489,6 +535,8 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h,
 	cpp = pspix->drawable.bitsPerPixel >> 3;
 	offset = (y * src_pitch) + (x * cpp);
 
+	nouveau_exa_pixmap_prepare_for_accel(pspix);
+
 	if (pNv->GART) {
 		if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch))
 			return TRUE;
@@ -516,6 +564,8 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h,
 	dst_pitch  = exaGetPixmapPitch(pdpix);
 	cpp = pdpix->drawable.bitsPerPixel >> 3;
 
+	nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
 	/* try hostdata transfer */
 	if (w * h * cpp < 16*1024) /* heuristic */
 	{
@@ -886,7 +936,7 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 	else
 		wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size;
 	wfb_pixmaps[i].used = true;
-	wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap);
+	wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap) && !nvpix->linear;
 	/* Division is too expensive for large numbers, so we precalculate a multiplication factor. */
 	wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1;
 	wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3);
diff --git a/src/nv04_exa.c b/src/nv04_exa.c
index de5da67..a7e9b16 100644
--- a/src/nv04_exa.c
+++ b/src/nv04_exa.c
@@ -83,6 +83,8 @@ NV04EXAPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
 	unsigned delta = nouveau_pixmap_offset(pPixmap);
 	unsigned int fmt, pitch, color;
 
+	nouveau_exa_pixmap_prepare_for_accel(pPixmap);
+
 	WAIT_RING(chan, 64);
 
 	planemask |= ~0 << pPixmap->drawable.bitsPerPixel;
@@ -190,6 +192,9 @@ NV04EXAPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int dx, int dy,
 	unsigned dst_delta = nouveau_pixmap_offset(pDstPixmap);
 	int fmt;
 
+	nouveau_exa_pixmap_prepare_for_accel(pSrcPixmap);
+	nouveau_exa_pixmap_prepare_for_accel(pDstPixmap);
+
 	WAIT_RING(chan, 64);
 
 	if (pSrcPixmap->drawable.bitsPerPixel !=
diff --git a/src/nv10_exa.c b/src/nv10_exa.c
index 291c2da..bef7d40 100644
--- a/src/nv10_exa.c
+++ b/src/nv10_exa.c
@@ -626,6 +626,11 @@ Bool NV10EXAPrepareComposite(int op,
 	NVPtr pNv = NVPTR(pScrn);
 	struct nouveau_channel *chan = pNv->chan;
 
+	nouveau_exa_pixmap_prepare_for_accel(pSrc);
+	if (pMask)
+		nouveau_exa_pixmap_prepare_for_accel(pMask);
+	nouveau_exa_pixmap_prepare_for_accel(pDst);
+
 	WAIT_RING(chan, 128);
 
 	if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
diff --git a/src/nv30_exa.c b/src/nv30_exa.c
index d3f83d3..58e5f27 100644
--- a/src/nv30_exa.c
+++ b/src/nv30_exa.c
@@ -447,6 +447,11 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict,
 	int fpid = NV30EXA_FPID_PASS_COL0;
 	NV30EXA_STATE;
 
+	nouveau_exa_pixmap_prepare_for_accel(psPix);
+	if (pmPix)
+		nouveau_exa_pixmap_prepare_for_accel(pmPix);
+	nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
 	WAIT_RING(chan, 128);
 
 	blend = NV30_GetPictOpRec(op);
diff --git a/src/nv40_exa.c b/src/nv40_exa.c
index 68da331..d10d93b 100644
--- a/src/nv40_exa.c
+++ b/src/nv40_exa.c
@@ -406,6 +406,11 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict,
 	int fpid = NV40EXA_FPID_PASS_COL0;
 	NV40EXA_STATE;
 
+	nouveau_exa_pixmap_prepare_for_accel(psPix);
+	if (pmPix)
+		nouveau_exa_pixmap_prepare_for_accel(pmPix);
+	nouveau_exa_pixmap_prepare_for_accel(pdPix);
+
 	WAIT_RING(chan, 128);
 
 	blend = NV40_GetPictOpRec(op);
diff --git a/src/nv50_exa.c b/src/nv50_exa.c
index ac6b6b4..8b3dab7 100644
--- a/src/nv50_exa.c
+++ b/src/nv50_exa.c
@@ -250,6 +250,8 @@ NV50EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg)
 	NV50EXA_LOCALS(pdpix);
 	uint32_t fmt;
 
+	nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
 	WAIT_RING(chan, 64);
 
 	if (!NV50EXA2DSurfaceFormat(pdpix, &fmt))
@@ -311,6 +313,9 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy,
 {
 	NV50EXA_LOCALS(pdpix);
 
+	nouveau_exa_pixmap_prepare_for_accel(pspix);
+	nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
 	WAIT_RING(chan, 64);
 
 	if (!NV50EXAAcquireSurface2D(pspix, 1))
@@ -769,6 +774,11 @@ NV50EXAPrepareComposite(int op,
 	NV50EXA_LOCALS(pspix);
 	const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
 
+	nouveau_exa_pixmap_prepare_for_accel(pspix);
+	if (pmpix)
+		nouveau_exa_pixmap_prepare_for_accel(pmpix);
+	nouveau_exa_pixmap_prepare_for_accel(pdpix);
+
 	WAIT_RING (chan, 128);
 	BEGIN_RING(chan, eng2d, 0x0110, 1);
 	OUT_RING  (chan, 0);
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 762b22e..7d820b7 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -73,6 +73,7 @@ void  NVTakedownDma(ScrnInfoPtr pScrn);
 Bool nouveau_exa_init(ScreenPtr pScreen);
 Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap);
 bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix);
+void nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix);
 void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 				WriteMemoryProcPtr *pWrite,
 				DrawablePtr pDraw);
diff --git a/src/nv_type.h b/src/nv_type.h
index aafaef6..5cee6b3 100644
--- a/src/nv_type.h
+++ b/src/nv_type.h
@@ -479,6 +479,7 @@ struct nouveau_pixmap {
 	unsigned size;
 	int map_refcount;
 	int tiling_mode;
+	bool dirty;
 };
 
 static inline struct nouveau_pixmap *
-- 
1.6.2

From af38223fe187797d2fbd94f59d549b1e039a4a0f Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Sat, 14 Mar 2009 18:07:31 +0100
Subject: [PATCH 3/6] nv50: support NV9X hw with wfb

- NVAX hardware seems to have the NV5X and NV8X beheaviour (based on one sample).
---
 src/nouveau_exa.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 72e783e..b8bbb53 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -385,6 +385,12 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
 			/* This allignment is very important. */
 			devkind = (width * cpp + 63) & ~63;
 			nvpix->size = devkind * ah;
+
+			/* A 128 bytes block is potentially moved 6 positions ahead.
+			 * It would be very difficult to predict this, so we overallocate.
+			 */
+			if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+				nvpix->size += 768;
 		}
 
 		ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -800,21 +806,51 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
 const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
 const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
 
+/* This applies to nv9X hw. They do strange memory rearranging. */
+/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
+static bool nv90_mode;
+const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+					0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int tile_offset[] = {
+					0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
+					2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
+					4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
+					6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+					};
+
 #define X_REMAINDER (x & mask_pitch[tile_height])
 #define Y_REMAINDER (y & mask_height[tile_height])
 
 /* tile_height and tile_pitch are expressed in powers of two */
 static inline unsigned int
 nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
-	unsigned int x, unsigned int y, unsigned int num_tiles_width)
+	unsigned int x, unsigned int y, unsigned int num_tiles_width, bool first)
 {
 	offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height]));
 
 	if (tile_height > 1)
-		offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]);
+		offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height], false);
 	else
 		offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
 
+	/* Here comes the correction for the unusual memory mapping of NV9X hw. */
+	if (first && nv90_mode) {
+		unsigned int suboffset, suboffset2, suboffset3;
+
+		/* 128 byte blocks within a larger block of 32768 bytes */
+		/* the first block misses the first "0, 1, 2, 3" sequence, so we add an extra 4096 bytes offset. */
+		suboffset = ((offset + 4096) & 0x7FFF) & ~0x7F;
+		/* now we have 256 blocks */
+		suboffset >>= 7;
+
+		/* now we have 32 rows */
+		suboffset2 = suboffset >> 3;
+		/* tile within row */
+		suboffset3 = suboffset & 0x7;
+
+		offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+	}
+
 	return offset;
 }
 
@@ -851,7 +887,7 @@ nouveau_exa_wfb_read_memory(const void *src, int size)
 	line_x = offset - line_y * LINEAR_PITCH;
 
 	new_src = pPixmap->devPrivate.ptr +
-		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
 		subpixel_offset;
 
 	memcpy(&bits, new_src, size);
@@ -892,7 +928,7 @@ nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size)
 	line_x = offset - line_y * LINEAR_PITCH;
 
 	new_dst = pPixmap->devPrivate.ptr +
-		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) +
+		nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) +
 		subpixel_offset;
 
 	memcpy(new_dst, &value, size);
@@ -905,6 +941,8 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 {
 	PixmapPtr pPixmap;
 	struct nouveau_pixmap *nvpix;
+	ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
+	NVPtr pNv = NVPTR(pScrn);
 
 	if (!pRead || !pWrite)
 		return;
@@ -947,6 +985,10 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 		wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
 		wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
 	}
+	if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
+		nv90_mode = true;
+	else
+		nv90_mode = false;
 
 	*pRead = nouveau_exa_wfb_read_memory;
 	*pWrite = nouveau_exa_wfb_write_memory;
-- 
1.6.2

From 882b51660c3ff04e3c55f23e5eb112c0ebe23773 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Sat, 14 Mar 2009 19:38:09 +0100
Subject: [PATCH 4/6] exa: don't put cpp == 0 pixmaps in vram

---
 src/nouveau_exa.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index b8bbb53..4cbcf50 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -358,9 +358,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
 
 	if (!nvpix->bo && nvpix->size) {
 		uint32_t cpp = ppix->drawable.bitsPerPixel >> 3;
-		uint32_t flags = NOUVEAU_BO_VRAM;
+		uint32_t flags = 0;
 		int ret;
 
+		/* Let's not waste vram on useless pixmaps. */
+		if (cpp)
+			flags |= NOUVEAU_BO_VRAM;
+
 		if (pNv->Architecture >= NV_ARCH_50 && cpp) {
 			uint32_t ah;
 			if (height > 47) {
-- 
1.6.2

From e51057b34064fb36d4d4873a85a07ff3d46e752a Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Sat, 14 Mar 2009 23:26:46 +0100
Subject: [PATCH 5/6] xv: some fixes

---
 src/nouveau_xv.c |   24 +++++++++++++-----------
 src/nv50_xv.c    |    2 +-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
index 246647b..33e49c5 100644
--- a/src/nouveau_xv.c
+++ b/src/nouveau_xv.c
@@ -718,10 +718,8 @@ NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch,
 {
 	int tmp;
 
-	if (pNv->Architecture >= NV_ARCH_50) {
-		npixels = (npixels + 7) & ~7;
-		nlines = (nlines + 7) & ~7;
-	}
+	if (pNv->Architecture >= NV_ARCH_50)
+		nlines = (nlines + 3) & ~3;
 
 	if (action_flags & IS_YV12) {
 		*srcPitch = (width + 3) & ~3;	/* of luma */
@@ -1252,13 +1250,17 @@ CPU_copy:
 		exaMoveInPixmap(ppix);
 
 		/* check if it made it offscreen */
-#if NOUVEAU_EXA_PIXMAPS
-		if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix))
-#else
-		if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize)
-#endif
-			/* we lost, insufficient space probably */
-			return BadAlloc;
+		if (pNv->EXADriverPtr->PixmapIsOffscreen) {
+			if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix)) {
+				/* we lost, insufficient space probably */
+				return BadAlloc;
+			}
+		} else {
+			if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize) {
+				/* we lost, insufficient space probably */
+				return BadAlloc;
+			}
+		}
 
 		ExaOffscreenMarkUsed(ppix);
 
diff --git a/src/nv50_xv.c b/src/nv50_xv.c
index 9601326..df35b1a 100644
--- a/src/nv50_xv.c
+++ b/src/nv50_xv.c
@@ -76,7 +76,7 @@ nv50_xv_state_emit(PixmapPtr ppix, int id, struct nouveau_bo *src,
 	case 24: OUT_RING  (chan, NV50TCL_RT_FORMAT_24BPP); break;
 	case 16: OUT_RING  (chan, NV50TCL_RT_FORMAT_16BPP); break;
 	}
-	OUT_RING  (chan, 0);
+	OUT_RING  (chan, nv50_exa_get_tile_mode(ppix));
 	OUT_RING  (chan, 0);
 	BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
 	OUT_RING  (chan, ppix->drawable.width);
-- 
1.6.2

From 13f16b41f7d8c703a8d5eec78ed7e64588e014cf Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <[email protected]>
Date: Wed, 18 Mar 2009 09:36:51 +0100
Subject: [PATCH 6/6] NV50: add NV84 wfb support

---
 src/nouveau_exa.c |   58 +++++++++++++++++++++++++++++++++++++++-------------
 src/nv_driver.c   |    2 +-
 src/nv_proto.h    |    2 +-
 3 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
index 4cbcf50..43d9945 100644
--- a/src/nouveau_exa.c
+++ b/src/nouveau_exa.c
@@ -390,11 +390,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height,
 			devkind = (width * cpp + 63) & ~63;
 			nvpix->size = devkind * ah;
 
-			/* A 128 bytes block is potentially moved 6 positions ahead.
+			/* A 128 bytes block is potentially moved 6/2 positions ahead.
 			 * It would be very difficult to predict this, so we overallocate.
 			 */
 			if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
 				nvpix->size += 768;
+			else if (pNv->NVArch == 0x84)
+				nvpix->size += 256;
 		}
 
 		ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size,
@@ -810,16 +812,20 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 };
 const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 };
 const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 };
 
-/* This applies to nv9X hw. They do strange memory rearranging. */
+/* Some cards do strange memory rearrangement. */
 /* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */
-static bool nv90_mode;
-const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
+static bool nv9x_mode;
+static bool nv84_mode;
+const int pattern_nv9x[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/
 					0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/};
+const int pattern_nv84[] = { 4, /**/ 5, 5, /**/ 4, 5, /**/ 4, 4, /**/ 5, 5, /**/ 4, 4, /**/ 5, 4, /**/ 5, 5, /**/ 4 };
 const int tile_offset[] = {
 					0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */
 					2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */
 					4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */
-					6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */
+					6, 6, -2, -2, -2, -2, -2, -2, /* type 3 */
+					0, 0, 0, 0, 2, 2, -2, -2, /* type 4 */
+					2, 2, -2, -2, 0, 0, 0, 0, /* type 5 */
 					};
 
 #define X_REMAINDER (x & mask_pitch[tile_height])
@@ -837,8 +843,24 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
 	else
 		offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER;
 
-	/* Here comes the correction for the unusual memory mapping of NV9X hw. */
-	if (first && nv90_mode) {
+	/* Here comes the correction for the unusual memory mapping of some hw. */
+	if (first && nv84_mode) {
+		unsigned int suboffset, suboffset2, suboffset3;
+
+		/* 128 byte blocks within a larger block of 16384 bytes */
+		suboffset = (offset & 0x3FFF) & ~0x7F;
+		/* now we have 128 blocks */
+		suboffset >>= 7;
+
+		/* now we have 16 rows */
+		suboffset2 = suboffset >> 3;
+		/* tile within row */
+		suboffset3 = suboffset & 0x7;
+
+		offset += tile_offset[suboffset3 + 8*pattern_nv84[suboffset2]] * 128;
+	}
+
+	if (first && nv9x_mode) {
 		unsigned int suboffset, suboffset2, suboffset3;
 
 		/* 128 byte blocks within a larger block of 32768 bytes */
@@ -852,7 +874,7 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height,
 		/* tile within row */
 		suboffset3 = suboffset & 0x7;
 
-		offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128;
+		offset += tile_offset[suboffset3 + 8*pattern_nv9x[suboffset2]] * 128;
 	}
 
 	return offset;
@@ -945,8 +967,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 {
 	PixmapPtr pPixmap;
 	struct nouveau_pixmap *nvpix;
-	ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum];
-	NVPtr pNv = NVPTR(pScrn);
 
 	if (!pRead || !pWrite)
 		return;
@@ -989,10 +1009,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 		wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1;
 		wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]);
 	}
-	if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0)
-		nv90_mode = true;
-	else
-		nv90_mode = false;
 
 	*pRead = nouveau_exa_wfb_read_memory;
 	*pWrite = nouveau_exa_wfb_write_memory;
@@ -1024,10 +1040,22 @@ nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw)
 }
 
 void
-nouveau_exa_wfb_init()
+nouveau_exa_wfb_init(ScrnInfoPtr pScrn)
 {
+	NVPtr pNv = NVPTR(pScrn);
 	int i;
 
+	if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) {
+		nv9x_mode = true;
+		nv84_mode = false;
+	} else if (pNv->NVArch == 0x84) {
+		nv9x_mode = false;
+		nv84_mode = true;
+	} else {
+		nv9x_mode = false;
+		nv84_mode = false;
+	}
+
 	for (i = 0; i < 6; i++) {
 		wfb_pixmaps[i].ppix = NULL;
 		wfb_pixmaps[i].start = 0;
diff --git a/src/nv_driver.c b/src/nv_driver.c
index f056c68..b66e510 100644
--- a/src/nv_driver.c
+++ b/src/nv_driver.c
@@ -1530,7 +1530,7 @@ NVPreInit(ScrnInfoPtr pScrn, int flags)
 #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0)
 	if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) {
 		pNv->wfb_enabled = true;
-		nouveau_exa_wfb_init();
+		nouveau_exa_wfb_init(pScrn);
 		if (xf86LoadSubModule(pScrn, "wfb") == NULL)
 			NVPreInitFail("\n");
 
diff --git a/src/nv_proto.h b/src/nv_proto.h
index 7d820b7..44c8355 100644
--- a/src/nv_proto.h
+++ b/src/nv_proto.h
@@ -78,7 +78,7 @@ void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead,
 				WriteMemoryProcPtr *pWrite,
 				DrawablePtr pDraw);
 void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw);
-void nouveau_exa_wfb_init();
+void nouveau_exa_wfb_init(ScrnInfoPtr pScrn);
 
 /* in nv_hw.c */
 void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int);
-- 
1.6.2

_______________________________________________
Nouveau mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/nouveau

Reply via email to