Not much has changed, except that i've test run these patches for a few weeks now.
I'm actually surprised i've avoided Out-Of-Memory issues. Patch 2 remains a temporary hack, awaiting a structural fix by darktama. The whole thing works fine, but XSHM is an issue (http://stillunknown.livejournal.com/928.html). With it disabled most apps are fine, although a few issues remain. As long as rendering isn't too dependent on software rendering (in pixman) then things are fine (better?). I will be away for some time in a few weeks, so i'm not sure if i will see the proper solution to patch 2. Maarten.
From 3425f32eb0d5c664cd5a4141812bc002960de795 Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Sat, 7 Mar 2009 23:49:19 +0100 Subject: [PATCH 1/6] nv50: implement wfb - Only for sufficiently new xserver's and exa_driver_pixmaps. --- src/nouveau_exa.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++-- src/nv50_exa.c | 31 +++++- src/nv_driver.c | 52 ++++++++-- src/nv_proto.h | 10 ++ src/nv_type.h | 4 +- 5 files changed, 357 insertions(+), 22 deletions(-) diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index b7bcc87..74804ec 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -89,7 +89,7 @@ NVAccelDownloadM2MF(PixmapPtr pspix, int x, int y, int w, int h, if (!linear) { BEGIN_RING(chan, m2mf, 0x0200, 7); OUT_RING (chan, 0); - OUT_RING (chan, 0); + OUT_RING (chan, nv50_exa_get_tile_mode(pspix)); OUT_RING (chan, pspix->drawable.width * cpp); OUT_RING (chan, pspix->drawable.height); OUT_RING (chan, 1); @@ -210,7 +210,7 @@ NVAccelUploadM2MF(PixmapPtr pdpix, int x, int y, int w, int h, if (!linear) { BEGIN_RING(chan, m2mf, 0x021c, 7); OUT_RING (chan, 0); - OUT_RING (chan, 0); + OUT_RING (chan, nv50_exa_get_tile_mode(pdpix)); OUT_RING (chan, pdpix->drawable.width * cpp); OUT_RING (chan, pdpix->drawable.height); OUT_RING (chan, 1); @@ -259,7 +259,11 @@ nouveau_exa_mark_sync(ScreenPtr pScreen) static void nouveau_exa_wait_marker(ScreenPtr pScreen, int marker) { - NVSync(xf86Screens[pScreen->myNum]); + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + + if (!pNv->exa_driver_pixmaps) + NVSync(xf86Screens[pScreen->myNum]); } static Bool @@ -351,17 +355,32 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height, if (!nvpix->bo && nvpix->size) { uint32_t cpp = ppix->drawable.bitsPerPixel >> 3; - /* At some point we should just keep 1bpp pixmaps in sysram */ uint32_t flags = NOUVEAU_BO_VRAM; int ret; if (pNv->Architecture >= NV_ARCH_50 && cpp) { - uint32_t aw = (width + 7) & ~7; - uint32_t ah = (height + 7) & ~7; + uint32_t ah; + if (height > 47) { + ah = (height + 63) & ~63; + nvpix->tiling_mode = 5; + } else if (height > 23) { + ah = (height + 31) & ~31; + nvpix->tiling_mode = 4; + } else if (height > 11) { + ah = (height + 15) & ~15; + nvpix->tiling_mode = 3; + } else if (height > 5) { + ah = (height + 7) & ~7; + nvpix->tiling_mode = 2; + } else { + ah = (height + 3) & ~3; + nvpix->tiling_mode = 1; + } flags |= NOUVEAU_BO_TILED; - devkind = ((aw * cpp) + 63) & ~63; + /* This allignment is very important. */ + devkind = (width * cpp + 63) & ~63; nvpix->size = devkind * ah; } @@ -390,8 +409,11 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix) NVPtr pNv = NVPTR(pScrn); if (pNv->exa_driver_pixmaps) { - if (!nouveau_pixmap_bo(ppix)->tiled) + if (!nouveau_pixmap_bo(ppix)) + return false; + if (nouveau_pixmap_bo(ppix)->tiled == 0) return false; + return true; } else if (pNv->Architecture < NV_ARCH_50 || exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase) @@ -403,10 +425,12 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix) static void * nouveau_exa_pixmap_map(PixmapPtr ppix) { + ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); unsigned delta = nouveau_pixmap_offset(ppix); - if (bo->tiled) { + if (!pNv->wfb_enabled && bo->tiled) { struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); nvpix->map_refcount++; @@ -430,9 +454,11 @@ nouveau_exa_pixmap_map(PixmapPtr ppix) static void nouveau_exa_pixmap_unmap(PixmapPtr ppix) { + ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); - if (bo->tiled) { + if (!pNv->wfb_enabled && bo->tiled) { struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); if (--nvpix->map_refcount) @@ -682,3 +708,239 @@ nouveau_exa_init(ScreenPtr pScreen) pNv->EXADriverPtr = exa; return TRUE; } + +/* WFB functions. */ + +static inline FbBits +nouveau_exa_wfb_read_memory_linear(const void *src, int size) +{ + FbBits bits = 0; + + memcpy(&bits, src, size); + + return bits; +} + +static inline void +nouveau_exa_wfb_write_memory_linear(void *dst, FbBits value, int size) +{ + memcpy(dst, &value, size); +} + +#define LINEAR_PITCH (pPixmap->devKind) + +/* Wfb related data. */ +static struct { + PixmapPtr ppix; + bool used; + bool tiled; + unsigned long start; + unsigned long end; + uint64_t multiply_factor; + uint8_t cpp; + unsigned int tile_height; + unsigned int num_tiles_width; +} wfb_pixmaps[6]; + +/* height: empty, 2, 4, 8, 16, 32, 64 */ +const unsigned int num_tiles[] = { 0, 0, 4, 2, 1, 1, 1 }; +const unsigned int tile_pitch[] = { 0, 3, 5, 6, 6, 6, 6 }; +const unsigned int mask_height_inv[] = {~0, ~1, ~3, ~7, ~15, ~31, ~63 }; +const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 }; +const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 }; +const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 }; + +#define X_REMAINDER (x & mask_pitch[tile_height]) +#define Y_REMAINDER (y & mask_height[tile_height]) + +/* tile_height and tile_pitch are expressed in powers of two */ +static inline unsigned int +nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height, + unsigned int x, unsigned int y, unsigned int num_tiles_width) +{ + offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height])); + + if (tile_height > 1) + offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]); + else + offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER; + + return offset; +} + +/* Note, we can only expose one read and write function, the linear versions are for internal consumption. */ +static FbBits +nouveau_exa_wfb_read_memory(const void *src, int size) +{ + int i; + uint64_t line_x, line_y; + unsigned long offset = (unsigned long) src, subpixel_offset; + PixmapPtr pPixmap = NULL; + FbBits bits = 0; + void *new_src; + + /* Find the right pixmap. */ + for (i = 0; i < 6; i++) + if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) { + pPixmap = wfb_pixmaps[i].ppix; + break; + } + + if (!pPixmap || !wfb_pixmaps[i].tiled) + return nouveau_exa_wfb_read_memory_linear(src, size); + + /* Now comes the decoding. */ + offset -= (unsigned long) pPixmap->devPrivate.ptr; + /* Assuming dword alligned offsets. */ + subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1); + offset &= ~(wfb_pixmaps[i].cpp - 1); + + /* Determine the coordinate first. */ + /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */ + line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32; + line_x = offset - line_y * LINEAR_PITCH; + + new_src = pPixmap->devPrivate.ptr + + nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) + + subpixel_offset; + + memcpy(&bits, new_src, size); + + return bits; +} + +static void +nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size) +{ + int i; + uint64_t line_x, line_y; + unsigned long offset = (unsigned long) dst, subpixel_offset; + PixmapPtr pPixmap = NULL; + void *new_dst; + + /* Find the right pixmap. */ + for (i = 0; i < 6; i++) + if (offset >= wfb_pixmaps[i].start && offset < wfb_pixmaps[i].end) { + pPixmap = wfb_pixmaps[i].ppix; + break; + } + + if (!pPixmap || !wfb_pixmaps[i].tiled) { + nouveau_exa_wfb_write_memory_linear(dst, value, size); + return; + } + + /* Now comes the decoding. */ + offset -= (unsigned long) pPixmap->devPrivate.ptr; + /* Assuming dword alligned offsets. */ + subpixel_offset = offset & (wfb_pixmaps[i].cpp - 1); + offset &= ~(wfb_pixmaps[i].cpp - 1); + + /* Determine the coordinate first. */ + /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */ + line_y = (offset * wfb_pixmaps[i].multiply_factor) >> 32; + line_x = offset - line_y * LINEAR_PITCH; + + new_dst = pPixmap->devPrivate.ptr + + nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) + + subpixel_offset; + + memcpy(new_dst, &value, size); +} + +void +nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, + WriteMemoryProcPtr *pWrite, + DrawablePtr pDraw) +{ + PixmapPtr pPixmap; + struct nouveau_pixmap *nvpix; + + if (!pRead || !pWrite) + return; + + pPixmap = NVGetDrawablePixmap(pDraw); + if (!pPixmap) + return; + + nvpix = nouveau_pixmap(pPixmap); + + int i; + for (i = 0; i < 6; i++) + if (!wfb_pixmaps[i].used) + break; + + if (i == 6) { + ErrorF("More than 6 wraps are setup, what the hell is going on?\n"); + *pRead = NULL; + *pWrite = NULL; + return; + } + + /* We will get a pointer, somewhere in the range of this pixmap. */ + /* Based on linear representation ofcource. */ + wfb_pixmaps[i].ppix = pPixmap; + wfb_pixmaps[i].start = (unsigned long) pPixmap->devPrivate.ptr; + if (!nvpix || !nvpix->bo) + wfb_pixmaps[i].end = wfb_pixmaps[i].start; + else + wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size; + wfb_pixmaps[i].used = true; + wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap); + /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */ + wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1; + wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3); + if (!nvpix) { + wfb_pixmaps[i].tile_height = 0; + wfb_pixmaps[i].num_tiles_width = 0; + } else { + wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1; + wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]); + } + + *pRead = nouveau_exa_wfb_read_memory; + *pWrite = nouveau_exa_wfb_write_memory; +} + +void +nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw) +{ + PixmapPtr pPixmap; + int i; + + pPixmap = NVGetDrawablePixmap(pDraw); + if (!pPixmap) + return; + + for (i = 0; i < 6; i++) + if (wfb_pixmaps[i].ppix == pPixmap) { + wfb_pixmaps[i].ppix = NULL; + wfb_pixmaps[i].start = 0; + wfb_pixmaps[i].end = 0; + wfb_pixmaps[i].used = false; + wfb_pixmaps[i].tiled = false; + wfb_pixmaps[i].multiply_factor = 0; + wfb_pixmaps[i].cpp = 0; + wfb_pixmaps[i].tile_height = 0; + wfb_pixmaps[i].num_tiles_width = 0; + break; + } +} + +void +nouveau_exa_wfb_init() +{ + int i; + + for (i = 0; i < 6; i++) { + wfb_pixmaps[i].ppix = NULL; + wfb_pixmaps[i].start = 0; + wfb_pixmaps[i].end = 0; + wfb_pixmaps[i].used = false; + wfb_pixmaps[i].tiled = false; + wfb_pixmaps[i].multiply_factor = 0; + wfb_pixmaps[i].cpp = 0; + wfb_pixmaps[i].tile_height = 0; + wfb_pixmaps[i].num_tiles_width = 0; + } +} diff --git a/src/nv50_exa.c b/src/nv50_exa.c index 3831ec3..ac6b6b4 100644 --- a/src/nv50_exa.c +++ b/src/nv50_exa.c @@ -72,6 +72,31 @@ NV50EXABlendOp[] = { /* Add */ { 0, 0, BF( ONE), BF( ONE) }, }; + +uint32_t +nv50_exa_get_tile_mode(PixmapPtr ppix) +{ + struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); + + if (!nvpix) + return 0x00; + + switch (nvpix->tiling_mode) { + case 1: /* pitch 32, height 4 */ + return 0x00; + case 2: /* pitch 64, height 8 */ + return 0x10; + case 3: /* pitch 64, height 16 */ + return 0x20; + case 4: /* pitch 64, height 32 */ + return 0x30; + case 5: /* pitch 64, height 64 */ + return 0x40; + default: + return 0x00; + } +} + static Bool NV50EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt) { @@ -128,7 +153,7 @@ NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src) BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, fmt); OUT_RING (chan, 0); - OUT_RING (chan, 0); + OUT_RING (chan, nv50_exa_get_tile_mode(ppix)); OUT_RING (chan, 1); OUT_RING (chan, 0); } @@ -457,7 +482,7 @@ NV50EXARenderTarget(PixmapPtr ppix, PicturePtr ppict) OUT_RELOCh(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); OUT_RING (chan, format); - OUT_RING (chan, 0); + OUT_RING (chan, nv50_exa_get_tile_mode(ppix)); OUT_RING (chan, 0x00000000); BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); OUT_RING (chan, ppix->drawable.width); @@ -577,7 +602,7 @@ NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit) NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n"); } OUT_RELOCl(chan, bo, delta, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); - OUT_RING (chan, 0xd0005000); + OUT_RING (chan, 0xd0005000 | (nv50_exa_get_tile_mode(ppix) << 18)); OUT_RING (chan, 0x00300000); OUT_RING (chan, ppix->drawable.width); OUT_RING (chan, (1 << NV50TIC_0_5_DEPTH_SHIFT) | ppix->drawable.height); diff --git a/src/nv_driver.c b/src/nv_driver.c index 4f07836..f056c68 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -158,6 +158,12 @@ static const char *fbSymbols[] = { NULL }; +static const char *wfbSymbols[] = { + "wfbPictureInit", + "wfbScreenInit", + NULL +}; + static const char *exaSymbols[] = { "exaDriverInit", "exaOffscreenInit", @@ -279,7 +285,7 @@ nouveauSetup(pointer module, pointer opts, int *errmaj, int *errmin) * Tell the loader about symbols from other modules that this module * might refer to. */ - LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols, + LoaderRefSymLists(vgahwSymbols, exaSymbols, fbSymbols, wfbSymbols, shadowSymbols, drmSymbols, i2cSymbols, ddcSymbols, vbeSymbols, int10Symbols, NULL); @@ -1521,10 +1527,23 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) * section. */ - if (xf86LoadSubModule(pScrn, "fb") == NULL) - NVPreInitFail("\n"); +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0) + if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) { + pNv->wfb_enabled = true; + nouveau_exa_wfb_init(); + if (xf86LoadSubModule(pScrn, "wfb") == NULL) + NVPreInitFail("\n"); - xf86LoaderReqSymLists(fbSymbols, NULL); + xf86LoaderReqSymLists(wfbSymbols, NULL); + } else +#endif + { + pNv->wfb_enabled = false; + if (xf86LoadSubModule(pScrn, "fb") == NULL) + NVPreInitFail("\n"); + + xf86LoaderReqSymLists(fbSymbols, NULL); + } /* Load EXA if needed */ if (!pNv->NoAccel) { @@ -2127,9 +2146,19 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) switch (pScrn->bitsPerPixel) { case 16: case 32: - ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY, - pScrn->xDpi, pScrn->yDpi, - displayWidth, pScrn->bitsPerPixel); +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0) + if (pNv->wfb_enabled) { + ret = wfbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY, + pScrn->xDpi, pScrn->yDpi, + displayWidth, pScrn->bitsPerPixel, + nouveau_exa_wfb_setup_wrap, nouveau_exa_wfb_finish_wrap); + } else +#endif + { + ret = fbScreenInit(pScreen, FBStart, pScrn->virtualX, pScrn->virtualY, + pScrn->xDpi, pScrn->yDpi, + displayWidth, pScrn->bitsPerPixel); + } break; default: xf86DrvMsg(scrnIndex, X_ERROR, @@ -2154,7 +2183,14 @@ NVScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) } } - fbPictureInit (pScreen, 0, 0); +#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0) + if (pNv->wfb_enabled) { + wfbPictureInit(pScreen, 0, 0); + } else +#endif + { + fbPictureInit(pScreen, 0, 0); + } xf86SetBlackWhitePixels(pScreen); diff --git a/src/nv_proto.h b/src/nv_proto.h index 1d00e1a..762b22e 100644 --- a/src/nv_proto.h +++ b/src/nv_proto.h @@ -18,6 +18,10 @@ void NVAccelFree(ScrnInfoPtr pScrn); /* in nv_driver.c */ Bool NVI2CInit(ScrnInfoPtr pScrn); +/* We can only include fb.h in normal or wfb mode, + * so we have to declare one ourself. */ +extern Bool wfbPictureInit (ScreenPtr pScreen, + PictFormatPtr formats, int nformats); /* in nv_dri.c */ Bool NVDRIScreenInit(ScrnInfoPtr pScrn); @@ -69,6 +73,11 @@ void NVTakedownDma(ScrnInfoPtr pScrn); Bool nouveau_exa_init(ScreenPtr pScreen); Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap); bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix); +void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, + WriteMemoryProcPtr *pWrite, + DrawablePtr pDraw); +void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw); +void nouveau_exa_wfb_init(); /* in nv_hw.c */ void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int); @@ -225,6 +234,7 @@ int NV40SetTexturePortAttribute(ScrnInfoPtr, Atom, INT32, pointer); Bool NVAccelInitNV50TCL(ScrnInfoPtr pScrn); /* in nv50_exa.c */ +uint32_t nv50_exa_get_tile_mode(PixmapPtr ppix); Bool NV50EXAPrepareSolid(PixmapPtr, int, Pixel, Pixel); void NV50EXASolid(PixmapPtr, int, int, int, int); void NV50EXADoneSolid(PixmapPtr); diff --git a/src/nv_type.h b/src/nv_type.h index f03c198..aafaef6 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -304,7 +304,8 @@ typedef struct _NVRec { uint8_t cur_head; ExaDriverPtr EXADriverPtr; - Bool exa_driver_pixmaps; + Bool exa_driver_pixmaps; + bool wfb_enabled; ScreenBlockHandlerProcPtr BlockHandler; CloseScreenProcPtr CloseScreen; /* Cursor */ @@ -477,6 +478,7 @@ struct nouveau_pixmap { void *linear; unsigned size; int map_refcount; + int tiling_mode; }; static inline struct nouveau_pixmap * -- 1.6.2
From 78457b975acd680469aa82800588540dd74142be Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Fri, 13 Mar 2009 09:57:12 +0100 Subject: [PATCH 2/6] exa: smarter initial mapping of driver allocated pixmaps - Use a software copy until the first accelerated op, then UTS it. - The path back does not exist. --- src/nouveau_exa.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++----- src/nv04_exa.c | 5 ++++ src/nv10_exa.c | 5 ++++ src/nv30_exa.c | 5 ++++ src/nv40_exa.c | 5 ++++ src/nv50_exa.c | 10 ++++++++ src/nv_proto.h | 1 + src/nv_type.h | 1 + 8 files changed, 89 insertions(+), 7 deletions(-) diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index 74804ec..72e783e 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -328,6 +328,9 @@ nouveau_exa_destroy_pixmap(ScreenPtr pScreen, void *priv) if (!nvpix) return; + if (nvpix->linear) + xfree(nvpix->linear); + nouveau_bo_ref(NULL, &nvpix->bo); xfree(nvpix); } @@ -422,6 +425,39 @@ nouveau_exa_pixmap_is_tiled(PixmapPtr ppix) return true; } +void +nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix) +{ + ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); + struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); + + if (!pNv->exa_driver_pixmaps) + return; + + if (nvpix->linear) { + int cpp = ppix->drawable.bitsPerPixel >> 3; + if ((ppix->drawable.width * ppix->drawable.height * cpp) < 16*1024) { + if (pNv->Architecture == NV_ARCH_50) + NV50EXAUploadSIFC(nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width, + ppix->drawable.height, cpp); + else + NV04EXAUploadIFC(pScrn, nvpix->linear, ppix->devKind, ppix, 0, 0, ppix->drawable.width, + ppix->drawable.height, cpp); + exaMarkSync(ppix->drawable.pScreen); + } else { + NVAccelUploadM2MF(ppix, 0, 0, ppix->drawable.width, + ppix->drawable.height, nvpix->linear, + ppix->devKind); + } + + xfree(nvpix->linear); + nvpix->linear = NULL; + } + + nvpix->dirty = true; +} + static void * nouveau_exa_pixmap_map(PixmapPtr ppix) { @@ -429,15 +465,26 @@ nouveau_exa_pixmap_map(PixmapPtr ppix) NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); unsigned delta = nouveau_pixmap_offset(ppix); + struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); - if (!pNv->wfb_enabled && bo->tiled) { - struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); + /* Attempt to handle first access with more grace. + * This handles multiple sw accesses (such as trapezoid rasterisation). + * This avoids the first access on potentially uncached memory. + */ + if (!nvpix->dirty) { + if (!nvpix->linear) + nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height); + nouveau_bo_map(bo, NOUVEAU_BO_RDWR); + return nvpix->linear; + } + + if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) { nvpix->map_refcount++; if (nvpix->linear) return nvpix->linear; - nvpix->linear = xcalloc(1, ppix->devKind * ppix->drawable.height); + nvpix->linear = xalloc(ppix->devKind * ppix->drawable.height); NVAccelDownloadM2MF(ppix, 0, 0, ppix->drawable.width, ppix->drawable.height, nvpix->linear, @@ -457,10 +504,9 @@ nouveau_exa_pixmap_unmap(PixmapPtr ppix) ScrnInfoPtr pScrn = xf86Screens[ppix->drawable.pScreen->myNum]; NVPtr pNv = NVPTR(pScrn); struct nouveau_bo *bo = nouveau_pixmap_bo(ppix); + struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); - if (!pNv->wfb_enabled && bo->tiled) { - struct nouveau_pixmap *nvpix = nouveau_pixmap(ppix); - + if (!pNv->wfb_enabled && bo->tiled && nvpix && nvpix->dirty) { if (--nvpix->map_refcount) return; @@ -489,6 +535,8 @@ nouveau_exa_download_from_screen(PixmapPtr pspix, int x, int y, int w, int h, cpp = pspix->drawable.bitsPerPixel >> 3; offset = (y * src_pitch) + (x * cpp); + nouveau_exa_pixmap_prepare_for_accel(pspix); + if (pNv->GART) { if (NVAccelDownloadM2MF(pspix, x, y, w, h, dst, dst_pitch)) return TRUE; @@ -516,6 +564,8 @@ nouveau_exa_upload_to_screen(PixmapPtr pdpix, int x, int y, int w, int h, dst_pitch = exaGetPixmapPitch(pdpix); cpp = pdpix->drawable.bitsPerPixel >> 3; + nouveau_exa_pixmap_prepare_for_accel(pdpix); + /* try hostdata transfer */ if (w * h * cpp < 16*1024) /* heuristic */ { @@ -886,7 +936,7 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, else wfb_pixmaps[i].end = wfb_pixmaps[i].start + nvpix->bo->size; wfb_pixmaps[i].used = true; - wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap); + wfb_pixmaps[i].tiled = nouveau_exa_pixmap_is_tiled(pPixmap) && !nvpix->linear; /* Division is too expensive for large numbers, so we precalculate a multiplication factor. */ wfb_pixmaps[i].multiply_factor = (0xFFFFFFFF/exaGetPixmapPitch(pPixmap)) + 1; wfb_pixmaps[i].cpp = (pPixmap->drawable.bitsPerPixel >> 3); diff --git a/src/nv04_exa.c b/src/nv04_exa.c index de5da67..a7e9b16 100644 --- a/src/nv04_exa.c +++ b/src/nv04_exa.c @@ -83,6 +83,8 @@ NV04EXAPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) unsigned delta = nouveau_pixmap_offset(pPixmap); unsigned int fmt, pitch, color; + nouveau_exa_pixmap_prepare_for_accel(pPixmap); + WAIT_RING(chan, 64); planemask |= ~0 << pPixmap->drawable.bitsPerPixel; @@ -190,6 +192,9 @@ NV04EXAPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int dx, int dy, unsigned dst_delta = nouveau_pixmap_offset(pDstPixmap); int fmt; + nouveau_exa_pixmap_prepare_for_accel(pSrcPixmap); + nouveau_exa_pixmap_prepare_for_accel(pDstPixmap); + WAIT_RING(chan, 64); if (pSrcPixmap->drawable.bitsPerPixel != diff --git a/src/nv10_exa.c b/src/nv10_exa.c index 291c2da..bef7d40 100644 --- a/src/nv10_exa.c +++ b/src/nv10_exa.c @@ -626,6 +626,11 @@ Bool NV10EXAPrepareComposite(int op, NVPtr pNv = NVPTR(pScrn); struct nouveau_channel *chan = pNv->chan; + nouveau_exa_pixmap_prepare_for_accel(pSrc); + if (pMask) + nouveau_exa_pixmap_prepare_for_accel(pMask); + nouveau_exa_pixmap_prepare_for_accel(pDst); + WAIT_RING(chan, 128); if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op)) diff --git a/src/nv30_exa.c b/src/nv30_exa.c index d3f83d3..58e5f27 100644 --- a/src/nv30_exa.c +++ b/src/nv30_exa.c @@ -447,6 +447,11 @@ NV30EXAPrepareComposite(int op, PicturePtr psPict, int fpid = NV30EXA_FPID_PASS_COL0; NV30EXA_STATE; + nouveau_exa_pixmap_prepare_for_accel(psPix); + if (pmPix) + nouveau_exa_pixmap_prepare_for_accel(pmPix); + nouveau_exa_pixmap_prepare_for_accel(pdPix); + WAIT_RING(chan, 128); blend = NV30_GetPictOpRec(op); diff --git a/src/nv40_exa.c b/src/nv40_exa.c index 68da331..d10d93b 100644 --- a/src/nv40_exa.c +++ b/src/nv40_exa.c @@ -406,6 +406,11 @@ NV40EXAPrepareComposite(int op, PicturePtr psPict, int fpid = NV40EXA_FPID_PASS_COL0; NV40EXA_STATE; + nouveau_exa_pixmap_prepare_for_accel(psPix); + if (pmPix) + nouveau_exa_pixmap_prepare_for_accel(pmPix); + nouveau_exa_pixmap_prepare_for_accel(pdPix); + WAIT_RING(chan, 128); blend = NV40_GetPictOpRec(op); diff --git a/src/nv50_exa.c b/src/nv50_exa.c index ac6b6b4..8b3dab7 100644 --- a/src/nv50_exa.c +++ b/src/nv50_exa.c @@ -250,6 +250,8 @@ NV50EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg) NV50EXA_LOCALS(pdpix); uint32_t fmt; + nouveau_exa_pixmap_prepare_for_accel(pdpix); + WAIT_RING(chan, 64); if (!NV50EXA2DSurfaceFormat(pdpix, &fmt)) @@ -311,6 +313,9 @@ NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy, { NV50EXA_LOCALS(pdpix); + nouveau_exa_pixmap_prepare_for_accel(pspix); + nouveau_exa_pixmap_prepare_for_accel(pdpix); + WAIT_RING(chan, 64); if (!NV50EXAAcquireSurface2D(pspix, 1)) @@ -769,6 +774,11 @@ NV50EXAPrepareComposite(int op, NV50EXA_LOCALS(pspix); const unsigned shd_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + nouveau_exa_pixmap_prepare_for_accel(pspix); + if (pmpix) + nouveau_exa_pixmap_prepare_for_accel(pmpix); + nouveau_exa_pixmap_prepare_for_accel(pdpix); + WAIT_RING (chan, 128); BEGIN_RING(chan, eng2d, 0x0110, 1); OUT_RING (chan, 0); diff --git a/src/nv_proto.h b/src/nv_proto.h index 762b22e..7d820b7 100644 --- a/src/nv_proto.h +++ b/src/nv_proto.h @@ -73,6 +73,7 @@ void NVTakedownDma(ScrnInfoPtr pScrn); Bool nouveau_exa_init(ScreenPtr pScreen); Bool nouveau_exa_pixmap_is_onscreen(PixmapPtr pPixmap); bool nouveau_exa_pixmap_is_tiled(PixmapPtr ppix); +void nouveau_exa_pixmap_prepare_for_accel(PixmapPtr ppix); void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, WriteMemoryProcPtr *pWrite, DrawablePtr pDraw); diff --git a/src/nv_type.h b/src/nv_type.h index aafaef6..5cee6b3 100644 --- a/src/nv_type.h +++ b/src/nv_type.h @@ -479,6 +479,7 @@ struct nouveau_pixmap { unsigned size; int map_refcount; int tiling_mode; + bool dirty; }; static inline struct nouveau_pixmap * -- 1.6.2
From af38223fe187797d2fbd94f59d549b1e039a4a0f Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Sat, 14 Mar 2009 18:07:31 +0100 Subject: [PATCH 3/6] nv50: support NV9X hw with wfb - NVAX hardware seems to have the NV5X and NV8X beheaviour (based on one sample). --- src/nouveau_exa.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index 72e783e..b8bbb53 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -385,6 +385,12 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height, /* This allignment is very important. */ devkind = (width * cpp + 63) & ~63; nvpix->size = devkind * ah; + + /* A 128 bytes block is potentially moved 6 positions ahead. + * It would be very difficult to predict this, so we overallocate. + */ + if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) + nvpix->size += 768; } ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size, @@ -800,21 +806,51 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 }; const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 }; const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 }; +/* This applies to nv9X hw. They do strange memory rearranging. */ +/* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */ +static bool nv90_mode; +const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ + 0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/}; +const int tile_offset[] = { + 0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */ + 2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */ + 4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */ + 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */ + }; + #define X_REMAINDER (x & mask_pitch[tile_height]) #define Y_REMAINDER (y & mask_height[tile_height]) /* tile_height and tile_pitch are expressed in powers of two */ static inline unsigned int nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height, - unsigned int x, unsigned int y, unsigned int num_tiles_width) + unsigned int x, unsigned int y, unsigned int num_tiles_width, bool first) { offset += (((x & mask_pitch_inv[tile_height]) >> tile_pitch[tile_height]) + (((y & mask_height_inv[tile_height]) >> tile_height) * num_tiles_width)) * (1 << (tile_height + tile_pitch[tile_height])); if (tile_height > 1) - offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height]); + offset = nouveau_exa_wfb_recurse_offset(offset, tile_height - 1, X_REMAINDER, Y_REMAINDER, num_tiles[tile_height], false); else offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER; + /* Here comes the correction for the unusual memory mapping of NV9X hw. */ + if (first && nv90_mode) { + unsigned int suboffset, suboffset2, suboffset3; + + /* 128 byte blocks within a larger block of 32768 bytes */ + /* the first block misses the first "0, 1, 2, 3" sequence, so we add an extra 4096 bytes offset. */ + suboffset = ((offset + 4096) & 0x7FFF) & ~0x7F; + /* now we have 256 blocks */ + suboffset >>= 7; + + /* now we have 32 rows */ + suboffset2 = suboffset >> 3; + /* tile within row */ + suboffset3 = suboffset & 0x7; + + offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128; + } + return offset; } @@ -851,7 +887,7 @@ nouveau_exa_wfb_read_memory(const void *src, int size) line_x = offset - line_y * LINEAR_PITCH; new_src = pPixmap->devPrivate.ptr + - nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) + + nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) + subpixel_offset; memcpy(&bits, new_src, size); @@ -892,7 +928,7 @@ nouveau_exa_wfb_write_memory(void *dst, FbBits value, int size) line_x = offset - line_y * LINEAR_PITCH; new_dst = pPixmap->devPrivate.ptr + - nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width) + + nouveau_exa_wfb_recurse_offset(0, wfb_pixmaps[i].tile_height, line_x, line_y, wfb_pixmaps[i].num_tiles_width, true) + subpixel_offset; memcpy(new_dst, &value, size); @@ -905,6 +941,8 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, { PixmapPtr pPixmap; struct nouveau_pixmap *nvpix; + ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum]; + NVPtr pNv = NVPTR(pScrn); if (!pRead || !pWrite) return; @@ -947,6 +985,10 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1; wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]); } + if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) + nv90_mode = true; + else + nv90_mode = false; *pRead = nouveau_exa_wfb_read_memory; *pWrite = nouveau_exa_wfb_write_memory; -- 1.6.2
From 882b51660c3ff04e3c55f23e5eb112c0ebe23773 Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Sat, 14 Mar 2009 19:38:09 +0100 Subject: [PATCH 4/6] exa: don't put cpp == 0 pixmaps in vram --- src/nouveau_exa.c | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index b8bbb53..4cbcf50 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -358,9 +358,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height, if (!nvpix->bo && nvpix->size) { uint32_t cpp = ppix->drawable.bitsPerPixel >> 3; - uint32_t flags = NOUVEAU_BO_VRAM; + uint32_t flags = 0; int ret; + /* Let's not waste vram on useless pixmaps. */ + if (cpp) + flags |= NOUVEAU_BO_VRAM; + if (pNv->Architecture >= NV_ARCH_50 && cpp) { uint32_t ah; if (height > 47) { -- 1.6.2
From e51057b34064fb36d4d4873a85a07ff3d46e752a Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Sat, 14 Mar 2009 23:26:46 +0100 Subject: [PATCH 5/6] xv: some fixes --- src/nouveau_xv.c | 24 +++++++++++++----------- src/nv50_xv.c | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c index 246647b..33e49c5 100644 --- a/src/nouveau_xv.c +++ b/src/nouveau_xv.c @@ -718,10 +718,8 @@ NV_calculate_pitches_and_mem_size(NVPtr pNv, int action_flags, int *srcPitch, { int tmp; - if (pNv->Architecture >= NV_ARCH_50) { - npixels = (npixels + 7) & ~7; - nlines = (nlines + 7) & ~7; - } + if (pNv->Architecture >= NV_ARCH_50) + nlines = (nlines + 3) & ~3; if (action_flags & IS_YV12) { *srcPitch = (width + 3) & ~3; /* of luma */ @@ -1252,13 +1250,17 @@ CPU_copy: exaMoveInPixmap(ppix); /* check if it made it offscreen */ -#if NOUVEAU_EXA_PIXMAPS - if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix)) -#else - if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize) -#endif - /* we lost, insufficient space probably */ - return BadAlloc; + if (pNv->EXADriverPtr->PixmapIsOffscreen) { + if (!pNv->EXADriverPtr->PixmapIsOffscreen(ppix)) { + /* we lost, insufficient space probably */ + return BadAlloc; + } + } else { + if (exaGetPixmapOffset(ppix) >= pNv->EXADriverPtr->memorySize) { + /* we lost, insufficient space probably */ + return BadAlloc; + } + } ExaOffscreenMarkUsed(ppix); diff --git a/src/nv50_xv.c b/src/nv50_xv.c index 9601326..df35b1a 100644 --- a/src/nv50_xv.c +++ b/src/nv50_xv.c @@ -76,7 +76,7 @@ nv50_xv_state_emit(PixmapPtr ppix, int id, struct nouveau_bo *src, case 24: OUT_RING (chan, NV50TCL_RT_FORMAT_24BPP); break; case 16: OUT_RING (chan, NV50TCL_RT_FORMAT_16BPP); break; } - OUT_RING (chan, 0); + OUT_RING (chan, nv50_exa_get_tile_mode(ppix)); OUT_RING (chan, 0); BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2); OUT_RING (chan, ppix->drawable.width); -- 1.6.2
From 13f16b41f7d8c703a8d5eec78ed7e64588e014cf Mon Sep 17 00:00:00 2001 From: Maarten Maathuis <[email protected]> Date: Wed, 18 Mar 2009 09:36:51 +0100 Subject: [PATCH 6/6] NV50: add NV84 wfb support --- src/nouveau_exa.c | 58 +++++++++++++++++++++++++++++++++++++++------------- src/nv_driver.c | 2 +- src/nv_proto.h | 2 +- 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c index 4cbcf50..43d9945 100644 --- a/src/nouveau_exa.c +++ b/src/nouveau_exa.c @@ -390,11 +390,13 @@ nouveau_exa_modify_pixmap_header(PixmapPtr ppix, int width, int height, devkind = (width * cpp + 63) & ~63; nvpix->size = devkind * ah; - /* A 128 bytes block is potentially moved 6 positions ahead. + /* A 128 bytes block is potentially moved 6/2 positions ahead. * It would be very difficult to predict this, so we overallocate. */ if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) nvpix->size += 768; + else if (pNv->NVArch == 0x84) + nvpix->size += 256; } ret = nouveau_bo_new(pNv->dev, flags, 0, nvpix->size, @@ -810,16 +812,20 @@ const unsigned int mask_pitch_inv[] = {~0, ~7, ~31, ~63, ~63, ~63, ~63 }; const unsigned int mask_height[] = {0, 1, 3, 7, 15, 31, 63 }; const unsigned int mask_pitch[] = {0, 7, 31, 63, 63, 63, 63 }; -/* This applies to nv9X hw. They do strange memory rearranging. */ +/* Some cards do strange memory rearrangement. */ /* 128 bytes blocks (which correspond to 32x4 tiles) are moved forward and backward in a special pattern. */ -static bool nv90_mode; -const int pattern[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ +static bool nv9x_mode; +static bool nv84_mode; +const int pattern_nv9x[] = { 0, 1, 2, 3, /**/ 0, 1, 2, 3, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2, /**/ 1, 2, 3, /**/ 0, 2, 3, /**/ 0, 1, 3, /**/ 0, 1, 2 /**/}; +const int pattern_nv84[] = { 4, /**/ 5, 5, /**/ 4, 5, /**/ 4, 4, /**/ 5, 5, /**/ 4, 4, /**/ 5, 4, /**/ 5, 5, /**/ 4 }; const int tile_offset[] = { 0, 0, 0, 0, 0, 0, 0, 0, /* type 0 */ 2, 2, 2, 2, 2, 2, -6, -6, /* type 1 */ 4, 4, 4, 4, -4, -4, -4, -4, /* type 2 */ - 6, 6, -2, -2, -2, -2, -2, -2 /* type 3 */ + 6, 6, -2, -2, -2, -2, -2, -2, /* type 3 */ + 0, 0, 0, 0, 2, 2, -2, -2, /* type 4 */ + 2, 2, -2, -2, 0, 0, 0, 0, /* type 5 */ }; #define X_REMAINDER (x & mask_pitch[tile_height]) @@ -837,8 +843,24 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height, else offset += (Y_REMAINDER * (1 << tile_pitch[tile_height])) + X_REMAINDER; - /* Here comes the correction for the unusual memory mapping of NV9X hw. */ - if (first && nv90_mode) { + /* Here comes the correction for the unusual memory mapping of some hw. */ + if (first && nv84_mode) { + unsigned int suboffset, suboffset2, suboffset3; + + /* 128 byte blocks within a larger block of 16384 bytes */ + suboffset = (offset & 0x3FFF) & ~0x7F; + /* now we have 128 blocks */ + suboffset >>= 7; + + /* now we have 16 rows */ + suboffset2 = suboffset >> 3; + /* tile within row */ + suboffset3 = suboffset & 0x7; + + offset += tile_offset[suboffset3 + 8*pattern_nv84[suboffset2]] * 128; + } + + if (first && nv9x_mode) { unsigned int suboffset, suboffset2, suboffset3; /* 128 byte blocks within a larger block of 32768 bytes */ @@ -852,7 +874,7 @@ nouveau_exa_wfb_recurse_offset(unsigned int offset, unsigned int tile_height, /* tile within row */ suboffset3 = suboffset & 0x7; - offset += tile_offset[suboffset3 + 8*pattern[suboffset2]] * 128; + offset += tile_offset[suboffset3 + 8*pattern_nv9x[suboffset2]] * 128; } return offset; @@ -945,8 +967,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, { PixmapPtr pPixmap; struct nouveau_pixmap *nvpix; - ScrnInfoPtr pScrn = xf86Screens[pDraw->pScreen->myNum]; - NVPtr pNv = NVPTR(pScrn); if (!pRead || !pWrite) return; @@ -989,10 +1009,6 @@ nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, wfb_pixmaps[i].tile_height = nvpix->tiling_mode + 1; wfb_pixmaps[i].num_tiles_width = LINEAR_PITCH/(1 << tile_pitch[wfb_pixmaps[i].tile_height]); } - if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) - nv90_mode = true; - else - nv90_mode = false; *pRead = nouveau_exa_wfb_read_memory; *pWrite = nouveau_exa_wfb_write_memory; @@ -1024,10 +1040,22 @@ nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw) } void -nouveau_exa_wfb_init() +nouveau_exa_wfb_init(ScrnInfoPtr pScrn) { + NVPtr pNv = NVPTR(pScrn); int i; + if (pNv->NVArch >= 0x90 && pNv->NVArch < 0xA0) { + nv9x_mode = true; + nv84_mode = false; + } else if (pNv->NVArch == 0x84) { + nv9x_mode = false; + nv84_mode = true; + } else { + nv9x_mode = false; + nv84_mode = false; + } + for (i = 0; i < 6; i++) { wfb_pixmaps[i].ppix = NULL; wfb_pixmaps[i].start = 0; diff --git a/src/nv_driver.c b/src/nv_driver.c index f056c68..b66e510 100644 --- a/src/nv_driver.c +++ b/src/nv_driver.c @@ -1530,7 +1530,7 @@ NVPreInit(ScrnInfoPtr pScrn, int flags) #if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,6,99,0,0) if (!pNv->NoAccel && pNv->exa_driver_pixmaps && pNv->Architecture == NV_ARCH_50) { pNv->wfb_enabled = true; - nouveau_exa_wfb_init(); + nouveau_exa_wfb_init(pScrn); if (xf86LoadSubModule(pScrn, "wfb") == NULL) NVPreInitFail("\n"); diff --git a/src/nv_proto.h b/src/nv_proto.h index 7d820b7..44c8355 100644 --- a/src/nv_proto.h +++ b/src/nv_proto.h @@ -78,7 +78,7 @@ void nouveau_exa_wfb_setup_wrap(ReadMemoryProcPtr *pRead, WriteMemoryProcPtr *pWrite, DrawablePtr pDraw); void nouveau_exa_wfb_finish_wrap(DrawablePtr pDraw); -void nouveau_exa_wfb_init(); +void nouveau_exa_wfb_init(ScrnInfoPtr pScrn); /* in nv_hw.c */ void NVCalcStateExt(ScrnInfoPtr,struct _riva_hw_state *,int,int,int,int,int,int); -- 1.6.2
_______________________________________________ Nouveau mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/nouveau
