Module Name:    xsrc
Committed By:   macallan
Date:           Fri Dec  3 19:43:22 UTC 2021

Modified Files:
        xsrc/external/mit/xf86-video-suncg14/dist/src: cg14_accel.c

Log Message:
start optimizing Copy8() operations
- only go right to left if srcY == dstY in the same pixmap
- special case copy where src and dst X have the same alignment
- special case the above where we don't need to read dst
scrolling should now be about as fast as SX can go


To generate a diff of this commit:
cvs rdiff -u -r1.18 -r1.19 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.18 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.19
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.18	Fri Dec  3 16:54:26 2021
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c	Fri Dec  3 19:43:22 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_accel.c,v 1.18 2021/12/03 16:54:26 macallan Exp $ */
+/* $NetBSD: cg14_accel.c,v 1.19 2021/12/03 19:43:22 macallan Exp $ */
 /*
  * Copyright (c) 2013 Michael Lorenz
  * All rights reserved.
@@ -98,8 +98,8 @@ CG14PrepareCopy(PixmapPtr pSrcPixmap, Pi
 	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
 
 	ENTER;
-	DPRINTF(X_ERROR, "bits per pixel: %d\n",
-	    pSrcPixmap->drawable.bitsPerPixel);
+	xf86Msg(X_ERROR, "bits per pixel: %d rop %x\n",
+	    pSrcPixmap->drawable.bitsPerPixel, alu);
 
 	if (planemask != p->last_mask) {
 		CG14Wait(p);
@@ -305,6 +305,106 @@ CG14Copy32(PixmapPtr pDstPixmap,
 	exaMarkSync(pDstPixmap->drawable.pScreen);
 }
 
+/*
+ * copy with same alignment, left to right, no ROP
+ */
+static void
+CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+{
+	int saddr, daddr, pre, cnt, wrds;
+
+	ENTER;
+	
+	pre = srcstart & 3;
+	if (pre != 0) pre = 4 - pre;
+	pre = min(pre, w);
+
+	while (h > 0) {
+		saddr = srcstart;
+		daddr = dststart;
+		cnt = w;
+		if (pre > 0) {
+			write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7));
+			saddr += pre;
+			daddr += pre;
+			cnt -= pre;
+			if (cnt == 0) goto next;
+		}
+		while (cnt > 3) {
+			wrds = min(32, cnt >> 2);
+			write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+			saddr += wrds << 2;
+			daddr += wrds << 2;
+			cnt -= wrds << 2;
+		}
+		if (cnt > 0) {
+			write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7));
+		}
+next:
+		srcstart += srcpitch;
+		dststart += dstpitch;
+		h--;
+	}
+}
+
+/*
+ * copy with same alignment, left to right, ROP
+ */
+static void
+CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+{
+	int saddr, daddr, pre, cnt, wrds;
+
+	ENTER;
+	
+	pre = srcstart & 3;
+	if (pre != 0) pre = 4 - pre;
+	pre = min(pre, w);
+
+	while (h > 0) {
+		saddr = srcstart;
+		daddr = dststart;
+		cnt = w;
+		if (pre > 0) {
+			write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7));
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1));
+			write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7));
+			saddr += pre;
+			daddr += pre;
+			cnt -= pre;
+			if (cnt == 0) goto next;
+		}
+		while (cnt > 3) {
+			wrds = min(32, cnt >> 2);
+			write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7));
+			if (cnt > 16) {
+				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15));
+				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17));
+			} else
+				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1));
+			write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7));
+			saddr += wrds << 2;
+			daddr += wrds << 2;
+			cnt -= wrds << 2;
+		}
+		if (cnt > 0) {
+			write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
+			write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7));
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1));
+			write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7));
+		}
+next:
+		srcstart += srcpitch;
+		dststart += dstpitch;
+		h--;
+	}
+}
+
 static void
 CG14Copy8(PixmapPtr pDstPixmap,
          int srcX, int srcY, int dstX, int dstY, int w, int h)
@@ -327,19 +427,13 @@ CG14Copy8(PixmapPtr pDstPixmap,
 	srcstart = srcX + (srcpitch * srcY) + srcoff;
 	dststart = dstX + (dstpitch * dstY) + dstoff;
 
-	/*
-	 * we always copy up to 32 pixels at a time so direction doesn't
-	 * matter if w<=32
-	 */
-	if (w > 32) {
-		if (p->xdir < 0) {
-			srcstart += (w - 32);
-			dststart += (w - 32);
-			xinc = -32;
-		} else
-			xinc = 32;
+	if ((p->xdir < 0) && (srcoff == dstoff) && (srcY == dstY)) {
+		srcstart += (w - 32);
+		dststart += (w - 32);
+		xinc = -32;
 	} else
 		xinc = 32;
+
 	if (p->ydir < 0) {
 		srcstart += (h - 1) * srcpitch;
 		dststart += (h - 1) * dstpitch;
@@ -349,6 +443,16 @@ CG14Copy8(PixmapPtr pDstPixmap,
 		srcinc = srcpitch;
 		dstinc = dstpitch;
 	}
+	if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) {
+		switch (p->last_rop) {
+			case 0xcc:
+				CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+				break;
+			default:
+				CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+		}
+		return;
+	}
 	if (p->last_rop == 0xcc) {
 		/* plain old copy */
 		if ( xinc > 0) {

Reply via email to