This patch is likely to make a big difference in the EXA codepath
I benchmarked, which is heavily used by the OLPC GTK theme.
The mini-benchmark attached to #1837 mimics it by drawing rounded
edges shapes with Cairo.
Aleph, a hacker from Italy, is also trying to improve rendering
performance on the OLPC. I asked him to experiment with this
patch and tell us what happens.
-------- Original Message --------
Subject: [PATCH] Avoid an unwanted pixmap migration in EXA when compositing
trapezoids
Date: Wed, 15 Aug 2007 19:58:56 +0200
From: Fredrik Höglund <[EMAIL PROTECTED]>
Reply-To: [EMAIL PROTECTED]
To: [EMAIL PROTECTED]
Hi,
This patch solves the trapezoid migration problem Michel Dänzer described
in a recent email on this mailing list.
miTrapezoids creates an alpha pixmap and initializes the contents using
PolyFillRect, which causes the pixmap to be moved in for acceleration.
The subsequent call(s) to RasterizeTrapezoid won't be accelerated by
EXA, which forces the pixmap to be migrated out again.
This patch avoids the problem by wrapping Trapezoids in EXA, and using
ExaCheckPolyFillRect to initialize the pixmap contents.
The code for exaTrapezoids and exaCreateAlphaPicture is copied from
miTrapezoids and miCreateAlphaPicture. The only change is the call to
ExaCheckPolyFillRect.
I'd like to commit this patch as a temporary solution for the 1.4 release.
I expect it to be replaced later by the generic solution Michel is
working on.
Here are some numbers when compositing a single 256x256 trapezoid
10000 times on a pixmap before and after the patch is applied.
This is with the master branch of the ATI driver and an R200.
Before:
--------------------------------------------------------------
10000 trapezoids, 37.835 seconds, 264.306 trapezoids / second.
10000 trapezoids, 37.819 seconds, 264.417 trapezoids / second.
10000 trapezoids, 37.828 seconds, 264.354 trapezoids / second.
After:
--------------------------------------------------------------
10000 trapezoids, 3.841 seconds, 2603.49 trapezoids / second.
10000 trapezoids, 3.796 seconds, 2634.35 trapezoids / second.
10000 trapezoids, 3.792 seconds, 2637.13 trapezoids / second.
Regards,
Fredrik
--
// Bernardo Innocenti
\X/ http://www.codewiz.org/
commit 302aab23833f8da4c59672a6a32c730ac5b9937c
Author: Fredrik Höglund <[EMAIL PROTECTED]>
Date: Wed Aug 15 19:19:11 2007 +0200
EXA: Wrap Trapezoids to prevent excessive migration of the alpha pixmap.
miTrapezoids creates an alpha pixmap and initializes the contents
using PolyFillRect, which causes the pixmap to be moved in for
acceleration. The subsequent call to RasterizeTrapezoid won't be
accelerated by EXA, which causing the pixmap to be moved back out
again.
By wrapping Trapezoids and using ExaCheckPolyFillRect instead of
PolyFillRect to initialize the pixmap, we avoid this roundtrip.
diff --git a/exa/exa.c b/exa/exa.c
index 8e22b89..aa42b92 100644
--- a/exa/exa.c
+++ b/exa/exa.c
@@ -526,6 +526,7 @@ exaCloseScreen(int i, ScreenPtr pScreen)
if (ps) {
ps->Composite = pExaScr->SavedComposite;
ps->Glyphs = pExaScr->SavedGlyphs;
+ ps->Trapezoids = pExaScr->SavedTrapezoids;
}
#endif
@@ -684,6 +685,9 @@ exaDriverInit (ScreenPtr pScreen,
pExaScr->SavedGlyphs = ps->Glyphs;
ps->Glyphs = exaGlyphs;
+
+ pExaScr->SavedTrapezoids = ps->Trapezoids;
+ ps->Trapezoids = exaTrapezoids;
}
#endif
diff --git a/exa/exa_priv.h b/exa/exa_priv.h
index a6d98cd..bab8aa2 100644
--- a/exa/exa_priv.h
+++ b/exa/exa_priv.h
@@ -108,6 +108,7 @@ typedef struct {
RasterizeTrapezoidProcPtr SavedRasterizeTrapezoid;
AddTrianglesProcPtr SavedAddTriangles;
GlyphsProcPtr SavedGlyphs;
+ TrapezoidsProcPtr SavedTrapezoids;
#endif
Bool swappedOut;
enum ExaMigrationHeuristic migration;
@@ -393,6 +394,11 @@ exaComposite(CARD8 op,
CARD16 height);
void
+exaTrapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
+ PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
+ int ntrap, xTrapezoid *traps);
+
+void
exaRasterizeTrapezoid (PicturePtr pPicture, xTrapezoid *trap,
int x_off, int y_off);
diff --git a/exa/exa_render.c b/exa/exa_render.c
index 5e7c67f..2dd3fc1 100644
--- a/exa/exa_render.c
+++ b/exa/exa_render.c
@@ -749,6 +749,132 @@ done:
}
#endif
+/**
+ * Same as miCreateAlphaPicture, except it uses ExaCheckPolyFillRect instead
+ * of PolyFillRect to initialize the pixmap after creating it, to prevent
+ * the pixmap from being migrated.
+ *
+ * See the comments about exaTrapezoids.
+ */
+static PicturePtr
+exaCreateAlphaPicture (ScreenPtr pScreen,
+ PicturePtr pDst,
+ PictFormatPtr pPictFormat,
+ CARD16 width,
+ CARD16 height)
+{
+ PixmapPtr pPixmap;
+ PicturePtr pPicture;
+ GCPtr pGC;
+ int error;
+ xRectangle rect;
+
+ if (width > 32767 || height > 32767)
+ return 0;
+
+ if (!pPictFormat)
+ {
+ if (pDst->polyEdge == PolyEdgeSharp)
+ pPictFormat = PictureMatchFormat (pScreen, 1, PICT_a1);
+ else
+ pPictFormat = PictureMatchFormat (pScreen, 8, PICT_a8);
+ if (!pPictFormat)
+ return 0;
+ }
+
+ pPixmap = (*pScreen->CreatePixmap) (pScreen, width, height,
+ pPictFormat->depth);
+ if (!pPixmap)
+ return 0;
+ pGC = GetScratchGC (pPixmap->drawable.depth, pScreen);
+ if (!pGC)
+ {
+ (*pScreen->DestroyPixmap) (pPixmap);
+ return 0;
+ }
+ ValidateGC (&pPixmap->drawable, pGC);
+ rect.x = 0;
+ rect.y = 0;
+ rect.width = width;
+ rect.height = height;
+ ExaCheckPolyFillRect (&pPixmap->drawable, pGC, 1, &rect);
+ exaPixmapDirty (pPixmap, 0, 0, width, height);
+ FreeScratchGC (pGC);
+ pPicture = CreatePicture (0, &pPixmap->drawable, pPictFormat,
+ 0, 0, serverClient, &error);
+ (*pScreen->DestroyPixmap) (pPixmap);
+ return pPicture;
+}
+
+/**
+ * exaTrapezoids is essentially a copy of miTrapezoids that uses
+ * exaCreateAlphaPicture instead of miCreateAlphaPicture.
+ *
+ * The problem with miCreateAlphaPicture is that it calls PolyFillRect
+ * to initialize the contents after creating the pixmap, which
+ * causes the pixmap to be moved in for acceleration. The subsequent
+ * call to RasterizeTrapezoid won't be accelerated however, which
+ * forces the pixmap to be moved out again.
+ *
+ * exaCreateAlphaPicture avoids this roundtrip by using ExaCheckPolyFillRect
+ * to initialize the contents.
+ */
+void
+exaTrapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
+ PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
+ int ntrap, xTrapezoid *traps)
+{
+ ScreenPtr pScreen = pDst->pDrawable->pScreen;
+ PictureScreenPtr ps = GetPictureScreen(pScreen);
+
+ /*
+ * Check for solid alpha add
+ */
+ if (op == PictOpAdd && miIsSolidAlpha (pSrc))
+ {
+ for (; ntrap; ntrap--, traps++)
+ (*ps->RasterizeTrapezoid) (pDst, traps, 0, 0);
+ }
+ else if (maskFormat)
+ {
+ PicturePtr pPicture;
+ BoxRec bounds;
+ INT16 xDst, yDst;
+ INT16 xRel, yRel;
+
+ xDst = traps[0].left.p1.x >> 16;
+ yDst = traps[0].left.p1.y >> 16;
+
+ miTrapezoidBounds (ntrap, traps, &bounds);
+ if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+ return;
+ pPicture = exaCreateAlphaPicture (pScreen, pDst, maskFormat,
+ bounds.x2 - bounds.x1,
+ bounds.y2 - bounds.y1);
+ if (!pPicture)
+ return;
+ for (; ntrap; ntrap--, traps++)
+ (*ps->RasterizeTrapezoid) (pPicture, traps,
+ -bounds.x1, -bounds.y1);
+ xRel = bounds.x1 + xSrc - xDst;
+ yRel = bounds.y1 + ySrc - yDst;
+ CompositePicture (op, pSrc, pPicture, pDst,
+ xRel, yRel, 0, 0, bounds.x1, bounds.y1,
+ bounds.x2 - bounds.x1,
+ bounds.y2 - bounds.y1);
+ FreePicture (pPicture, 0);
+ }
+ else
+ {
+ if (pDst->polyEdge == PolyEdgeSharp)
+ maskFormat = PictureMatchFormat (pScreen, 1, PICT_a1);
+ else
+ maskFormat = PictureMatchFormat (pScreen, 8, PICT_a8);
+ for (; ntrap; ntrap--, traps++)
+ exaTrapezoids (op, pSrc, pDst, maskFormat, xSrc, ySrc, 1, traps);
+ }
+}
+
#define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0)
/**
_______________________________________________
Devel mailing list
[email protected]
http://lists.laptop.org/listinfo/devel