> On Sat, 2009-08-22 at 04:18 -0700, Dave Airlie wrote: > > src/radeon_exa.c | 3 +-- > > src/radeon_exa_funcs.c | 35 +++++++++++++++++++++++++++++++++++ > > 2 files changed, 36 insertions(+), 2 deletions(-) > > > > New commits: > > commit bac224912c750dc1c85ff2d9b8526dad6c23b572 > > Author: Dave Airlie <[email protected]> > > Date: Sat Aug 22 21:17:59 2009 +1000 > > > > radeon: don't spec any initial placement for pixmaps. > > > > allow the first use to decide placement. > > > > diff --git a/src/radeon_exa.c b/src/radeon_exa.c > > index ec65722..3f3c9ba 100644 > > --- a/src/radeon_exa.c > > +++ b/src/radeon_exa.c > > @@ -391,8 +391,7 @@ void *RADEONEXACreatePixmap(ScreenPtr pScreen, int > > size, int align) > > return new_priv; > > > > new_priv->bo = radeon_bo_open(info->bufmgr, 0, size, > > - align, RADEON_GEM_DOMAIN_VRAM | > > - RADEON_GEM_DOMAIN_GTT, 0); > > + align, 0, 0); > > if (!new_priv->bo) { > > xfree(new_priv); > > ErrorF("Failed to alloc memory\n"); > > IME this has a significant impact on 2D performance (e.g. x11perf > -aa10text drops from about 325K/s to about 225K/s, and this is > noticeable in real apps), until the kernel side avoids unnecessary BO > copies.
The problem is the API is specced as initial_domain, not domains, so its incorrect. I'm writing a TTM patch at the moment to avoid BO copies to VRAM where nothing has yet mapped or touched the BO. Dave. > > Also, current EXA (with EXA_MIXED_PIXMAPS) actually only calls the > driver CreatePixmap(2) hook just before the first accelerated operation > on the pixmap. So I'd suggest setting RADEON_GEM_DOMAIN_VRAM | > RADEON_GEM_DOMAIN_GTT in both hooks. > > > > commit 77f98717d825162da106c6898cdbcbdf5c984ae6 > > Author: Dave Airlie <[email protected]> > > Date: Sat Aug 22 21:16:25 2009 +1000 > > > > exa/cs: add DFS from GTT optimisation > > > > This uses the new libdrm busy interface, once I had this in place > > I added a error if this happened and it does on my desktop here, > > so may as well add the optimisation that used to be in my old KMS tree. > > > > Signed-off-by: Dave Airlie <[email protected]> > > > > diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c > > index e48317f..f937c4e 100644 > > --- a/src/radeon_exa_funcs.c > > +++ b/src/radeon_exa_funcs.c > > @@ -507,6 +507,33 @@ out: > > } > > > > static Bool > > +RADEONDownloadFromScreenGTT(PixmapPtr pSrc, int x, int y, int w, > > + int h, char *dst, int dst_pitch) > > +{ > > + struct radeon_exa_pixmap_priv *driver_priv; > > + int src_pitch = exaGetPixmapPitch(pSrc); > > + int bpp = pSrc->drawable.bitsPerPixel; > > + int src_offset; > > + int r; > > + > > + driver_priv = exaGetPixmapDriverPrivate(pSrc); > > + r = radeon_bo_map(driver_priv->bo, 0); > > + if (r) > > + return FALSE; > > + > > + src_offset = (x * bpp / 8) + (y * src_pitch); > > + w *= bpp / 8; > > + > > + while (h--) { > > + memcpy(dst, driver_priv->bo->ptr + src_offset, w); > > + src_offset += src_pitch; > > + dst += dst_pitch; > > + } > > + radeon_bo_unmap(driver_priv->bo); > > + return TRUE; > > +} > > + > > +static Bool > > RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, > > int h, char *dst, int dst_pitch) > > { > > @@ -519,11 +546,19 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int > > y, int w, > > unsigned bpp = pSrc->drawable.bitsPerPixel; > > uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63; > > Bool r; > > + uint32_t src_domain; > > + int busy; > > > > if (bpp < 8) > > return FALSE; > > > > driver_priv = exaGetPixmapDriverPrivate(pSrc); > > + > > + busy = radeon_bo_is_busy(driver_priv->bo, &src_domain); > > + > > + if (src_domain == RADEON_GEM_DOMAIN_GTT) > > + return RADEONDownloadFromScreenGTT(pSrc, x, y, w, h, > > + dst, dst_pitch); > > RADEONDownloadFromScreenGTT is superfluous, just return FALSE from > RADEONDownloadFromScreenCS. Also, this doesn't take into account any > unflushed operations involving the BO. > > Below is what I've been testing for DFS / UTS. > > > diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c > index bada4fb..abb8be3 100644 > --- a/src/radeon_exa_funcs.c > +++ b/src/radeon_exa_funcs.c > @@ -459,6 +459,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int > w, int h, > struct radeon_bo *scratch; > unsigned size; > uint32_t datatype = 0; > + uint32_t dst_domain; > uint32_t dst_pitch_offset; > unsigned bpp = pDst->drawable.bitsPerPixel; > uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63; > @@ -470,6 +471,18 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int > w, int h, > > driver_priv = exaGetPixmapDriverPrivate(pDst); > > + /* If we know the BO won't be busy, don't bother */ > + if (driver_priv->bo->cref == 1 && > + !radeon_bo_is_busy(driver_priv->bo, &dst_domain)) > +{ > + static Bool printed; > + if (!printed) { > + ErrorF("%s: Bailing because BO is idle\n", __func__); > + printed = TRUE; > + } > + return FALSE; > +} > + > size = scratch_pitch * h; > scratch = radeon_bo_open(info->bufmgr, 0, size, 0, > RADEON_GEM_DOMAIN_GTT, 0); > if (scratch == NULL) { > @@ -519,6 +532,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, > int w, > struct radeon_bo *scratch; > unsigned size; > uint32_t datatype = 0; > + uint32_t src_domain = 0; > uint32_t src_pitch_offset; > unsigned bpp = pSrc->drawable.bitsPerPixel; > uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63; > @@ -529,6 +543,30 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, > int w, > > driver_priv = exaGetPixmapDriverPrivate(pSrc); > > + /* If we know the BO will end up in GTT anyway, don't bother */ > + if (driver_priv->bo->cref > 1) { > + src_domain = driver_priv->bo->space_accounted & 0xffff; > + if (!src_domain) > + src_domain = driver_priv->bo->space_accounted >> 16; > + > + if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == > + (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) > + src_domain = 0; > + } > + > + if (!src_domain) > + radeon_bo_is_busy(driver_priv->bo, &src_domain); > + > + if (src_domain == RADEON_GEM_DOMAIN_GTT) > +{ > + static Bool printed; > + if (!printed) { > + ErrorF("%s: Bailing because BO is in GTT\n", __func__); > + printed = TRUE; > + } > + return FALSE; > +} > + > size = scratch_pitch * h; > scratch = radeon_bo_open(info->bufmgr, 0, size, 0, > RADEON_GEM_DOMAIN_GTT, 0); > if (scratch == NULL) { > > > _______________________________________________ xorg-driver-ati mailing list [email protected] http://lists.x.org/mailman/listinfo/xorg-driver-ati
