Optimization. Saves one compare per DWORD for common case where BCI queue has ample space for bitmap data.

Changelog:
* EXA: use memcpy instead of loop for UploadToScreen operation

--
perl -e '$x=2.4;print sprintf("%.0f + %.0f = %.0f\n",$x,$x,$x+$x);'

>From 70c9a579041f1ec588a580647966dff1e17e26d6 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Alex=20Villac=C3=ADs=20Lasso?= <[email protected]>
Date: Tue, 30 Dec 2008 01:24:42 -0500
Subject: [PATCH] EXA: Optimization to use one memcpy per scanline instead of a conditional inside a loop for every dword.

---
 src/savage_exa.c |   20 ++++++++++++++------
 1 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/savage_exa.c b/src/savage_exa.c
index 7c6efb3..538e000 100644
--- a/src/savage_exa.c
+++ b/src/savage_exa.c
@@ -495,13 +495,21 @@ SavageUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int
     dwords = (((w * Bpp) + 3) >> 2);
     for (i = 0; i < h; i++) {
 	srcp = (CARD32 *)src;
-	for (j = 0; j < dwords; j++) {
-	    if (queue < 4) {
-		BCI_RESET;
-		queue = 120 * 1024;
+
+	if (4 * dwords <= queue) {
+	    /* WARNING: breaking BCI_PTR abstraction here */
+	    memcpy(bci_ptr, srcp, 4 * dwords);
+	    bci_ptr += dwords;
+	    queue -= 4 * dwords;
+	} else {
+	    for (j = 0; j < dwords; j++) {
+		if (queue < 4) {
+		    BCI_RESET;
+		    queue = 120 * 1024;
+		}
+		BCI_SEND(*srcp++);
+		queue -= 4;
 	    }
-	    BCI_SEND(*srcp++);
-	    queue -= 4;
 	}
 	src += src_pitch;
     }
-- 
1.6.0.6

_______________________________________________
xorg mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/xorg

Reply via email to