This patch is likely to make a big difference in the EXA codepath
I benchmarked, which is heavily used by the OLPC GTK theme.

The mini-benchmark attached to #1837 mimics it by drawing rounded
edges shapes with Cairo.

Aleph, a hacker from Italy, is also trying to improve rendering
performance on the OLPC.  I asked him to experiment with this
patch and tell us what happens.

-------- Original Message --------
Subject: [PATCH] Avoid an unwanted pixmap migration in EXA when compositing 
trapezoids
Date: Wed, 15 Aug 2007 19:58:56 +0200
From: Fredrik Höglund <[EMAIL PROTECTED]>
Reply-To: [EMAIL PROTECTED]
To: [EMAIL PROTECTED]

Hi,

This patch solves the trapezoid migration problem Michel Dänzer described
in a recent email on this mailing list.

miTrapezoids creates an alpha pixmap and initializes the contents using
PolyFillRect, which causes the pixmap to be moved in for acceleration.
The subsequent call(s) to RasterizeTrapezoid won't be accelerated by
EXA, which forces the pixmap to be migrated out again.

This patch avoids the problem by wrapping Trapezoids in EXA, and using
ExaCheckPolyFillRect to initialize the pixmap contents.

The code for exaTrapezoids and exaCreateAlphaPicture is copied from
miTrapezoids and miCreateAlphaPicture. The only change is the call to
ExaCheckPolyFillRect.

I'd like to commit this patch as a temporary solution for the 1.4 release.
I expect it to be replaced later by the generic solution Michel is
working on.

Here are some numbers when compositing a single 256x256 trapezoid
10000 times on a pixmap before and after the patch is applied.
This is with the master branch of the ATI driver and an R200.

Before:
--------------------------------------------------------------
10000 trapezoids, 37.835 seconds, 264.306 trapezoids / second.
10000 trapezoids, 37.819 seconds, 264.417 trapezoids / second.
10000 trapezoids, 37.828 seconds, 264.354 trapezoids / second.

After:
--------------------------------------------------------------
10000 trapezoids, 3.841 seconds, 2603.49 trapezoids / second.
10000 trapezoids, 3.796 seconds, 2634.35 trapezoids / second.
10000 trapezoids, 3.792 seconds, 2637.13 trapezoids / second.

Regards,
Fredrik



--
  // Bernardo Innocenti
\X/  http://www.codewiz.org/
commit 302aab23833f8da4c59672a6a32c730ac5b9937c
Author: Fredrik Höglund <[EMAIL PROTECTED]>
Date:   Wed Aug 15 19:19:11 2007 +0200

    EXA: Wrap Trapezoids to prevent excessive migration of the alpha pixmap.
    
    miTrapezoids creates an alpha pixmap and initializes the contents
    using PolyFillRect, which causes the pixmap to be moved in for
    acceleration. The subsequent call to RasterizeTrapezoid won't be
    accelerated by EXA, which causing the pixmap to be moved back out
    again.
    
    By wrapping Trapezoids and using ExaCheckPolyFillRect instead of
    PolyFillRect to initialize the pixmap, we avoid this roundtrip.

diff --git a/exa/exa.c b/exa/exa.c
index 8e22b89..aa42b92 100644
--- a/exa/exa.c
+++ b/exa/exa.c
@@ -526,6 +526,7 @@ exaCloseScreen(int i, ScreenPtr pScreen)
     if (ps) {
 	ps->Composite = pExaScr->SavedComposite;
 	ps->Glyphs = pExaScr->SavedGlyphs;
+	ps->Trapezoids = pExaScr->SavedTrapezoids;
     }
 #endif
 
@@ -684,6 +685,9 @@ exaDriverInit (ScreenPtr		pScreen,
 
 	pExaScr->SavedGlyphs = ps->Glyphs;
 	ps->Glyphs = exaGlyphs;
+
+	pExaScr->SavedTrapezoids = ps->Trapezoids;
+	ps->Trapezoids = exaTrapezoids;
     }
 #endif
 
diff --git a/exa/exa_priv.h b/exa/exa_priv.h
index a6d98cd..bab8aa2 100644
--- a/exa/exa_priv.h
+++ b/exa/exa_priv.h
@@ -108,6 +108,7 @@ typedef struct {
     RasterizeTrapezoidProcPtr	 SavedRasterizeTrapezoid;
     AddTrianglesProcPtr		 SavedAddTriangles;
     GlyphsProcPtr                SavedGlyphs;
+    TrapezoidsProcPtr            SavedTrapezoids;
 #endif
     Bool			 swappedOut;
     enum ExaMigrationHeuristic	 migration;
@@ -393,6 +394,11 @@ exaComposite(CARD8	op,
 	     CARD16	height);
 
 void
+exaTrapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
+               PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
+               int ntrap, xTrapezoid *traps);
+
+void
 exaRasterizeTrapezoid (PicturePtr pPicture, xTrapezoid  *trap,
 		       int x_off, int y_off);
 
diff --git a/exa/exa_render.c b/exa/exa_render.c
index 5e7c67f..2dd3fc1 100644
--- a/exa/exa_render.c
+++ b/exa/exa_render.c
@@ -749,6 +749,132 @@ done:
 }
 #endif
 
+/**
+ * Same as miCreateAlphaPicture, except it uses ExaCheckPolyFillRect instead
+ * of PolyFillRect to initialize the pixmap after creating it, to prevent
+ * the pixmap from being migrated.
+ *
+ * See the comments about exaTrapezoids.
+ */
+static PicturePtr
+exaCreateAlphaPicture (ScreenPtr     pScreen,
+                       PicturePtr    pDst,
+                       PictFormatPtr pPictFormat,
+                       CARD16        width,
+                       CARD16        height)
+{
+    PixmapPtr	    pPixmap;
+    PicturePtr	    pPicture;
+    GCPtr	    pGC;
+    int		    error;
+    xRectangle	    rect;
+
+    if (width > 32767 || height > 32767)
+	return 0;
+
+    if (!pPictFormat)
+    {
+	if (pDst->polyEdge == PolyEdgeSharp)
+	    pPictFormat = PictureMatchFormat (pScreen, 1, PICT_a1);
+	else
+	    pPictFormat = PictureMatchFormat (pScreen, 8, PICT_a8);
+	if (!pPictFormat)
+	    return 0;
+    }
+
+    pPixmap = (*pScreen->CreatePixmap) (pScreen, width, height,
+					pPictFormat->depth);
+    if (!pPixmap)
+	return 0;
+    pGC = GetScratchGC (pPixmap->drawable.depth, pScreen);
+    if (!pGC)
+    {
+	(*pScreen->DestroyPixmap) (pPixmap);
+	return 0;
+    }
+    ValidateGC (&pPixmap->drawable, pGC);
+    rect.x = 0;
+    rect.y = 0;
+    rect.width = width;
+    rect.height = height;
+    ExaCheckPolyFillRect (&pPixmap->drawable, pGC, 1, &rect);
+    exaPixmapDirty (pPixmap, 0, 0, width, height);
+    FreeScratchGC (pGC);
+    pPicture = CreatePicture (0, &pPixmap->drawable, pPictFormat,
+			      0, 0, serverClient, &error);
+    (*pScreen->DestroyPixmap) (pPixmap);
+    return pPicture;
+}
+
+/**
+ * exaTrapezoids is essentially a copy of miTrapezoids that uses
+ * exaCreateAlphaPicture instead of miCreateAlphaPicture.
+ *
+ * The problem with miCreateAlphaPicture is that it calls PolyFillRect
+ * to initialize the contents after creating the pixmap, which
+ * causes the pixmap to be moved in for acceleration. The subsequent
+ * call to RasterizeTrapezoid won't be accelerated however, which
+ * forces the pixmap to be moved out again.
+ *
+ * exaCreateAlphaPicture avoids this roundtrip by using ExaCheckPolyFillRect
+ * to initialize the contents.
+ */
+void
+exaTrapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
+               PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
+               int ntrap, xTrapezoid *traps)
+{
+    ScreenPtr		pScreen = pDst->pDrawable->pScreen;
+    PictureScreenPtr    ps = GetPictureScreen(pScreen);
+
+    /*
+     * Check for solid alpha add
+     */
+    if (op == PictOpAdd && miIsSolidAlpha (pSrc))
+    {
+	for (; ntrap; ntrap--, traps++)
+	    (*ps->RasterizeTrapezoid) (pDst, traps, 0, 0);
+    }
+    else if (maskFormat)
+    {
+	PicturePtr	pPicture;
+	BoxRec		bounds;
+	INT16		xDst, yDst;
+	INT16		xRel, yRel;
+
+	xDst = traps[0].left.p1.x >> 16;
+	yDst = traps[0].left.p1.y >> 16;
+
+	miTrapezoidBounds (ntrap, traps, &bounds);
+	if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+	    return;
+	pPicture = exaCreateAlphaPicture (pScreen, pDst, maskFormat,
+	                                  bounds.x2 - bounds.x1,
+	                                  bounds.y2 - bounds.y1);
+	if (!pPicture)
+	    return;
+	for (; ntrap; ntrap--, traps++)
+	    (*ps->RasterizeTrapezoid) (pPicture, traps,
+				       -bounds.x1, -bounds.y1);
+	xRel = bounds.x1 + xSrc - xDst;
+	yRel = bounds.y1 + ySrc - yDst;
+	CompositePicture (op, pSrc, pPicture, pDst,
+			  xRel, yRel, 0, 0, bounds.x1, bounds.y1,
+			  bounds.x2 - bounds.x1,
+			  bounds.y2 - bounds.y1);
+	FreePicture (pPicture, 0);
+    }
+    else
+    {
+	if (pDst->polyEdge == PolyEdgeSharp)
+	    maskFormat = PictureMatchFormat (pScreen, 1, PICT_a1);
+	else
+	    maskFormat = PictureMatchFormat (pScreen, 8, PICT_a8);
+	for (; ntrap; ntrap--, traps++)
+	    exaTrapezoids (op, pSrc, pDst, maskFormat, xSrc, ySrc, 1, traps);
+    }
+}
+
 #define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0)
 
 /**

_______________________________________________
Devel mailing list
[email protected]
http://lists.laptop.org/listinfo/devel

Reply via email to