Module Name:    xsrc
Committed By:   macallan
Date:           Fri Dec 24 04:41:40 UTC 2021

Modified Files:
        xsrc/external/mit/xf86-video-suncg14/dist/src: cg14.h cg14_accel.c
            cg14_render.c

Log Message:
add macros to simplify issuing SX instructions, hide the address alignment /
displacement shenanigans SX needs, and make the code more resemble the
inline assembler source it arguably is


To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
cvs rdiff -u -r1.26 -r1.27 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
cvs rdiff -u -r1.13 -r1.14 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.15
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14	Wed Jul 24 16:07:59 2019
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h	Fri Dec 24 04:41:40 2021
@@ -135,6 +135,9 @@ write_sx_io(Cg14Ptr p, int reg, uint32_t
 	p->queuecount++;
 }
 
+#define sxi(inst) write_sx_reg(p, SX_INSTRUCTIONS, (inst))
+#define sxm(inst, addr, reg, count) write_sx_io(p, (addr) & ~7, inst((reg), (count), (addr) & 7))
+
 Bool CG14SetupCursor(ScreenPtr);
 Bool CG14InitAccel(ScreenPtr);
 

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.27
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26	Sun Dec 19 04:50:27 2021
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c	Fri Dec 24 04:41:40 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_accel.c,v 1.26 2021/12/19 04:50:27 macallan Exp $ */
+/* $NetBSD: cg14_accel.c,v 1.27 2021/12/24 04:41:40 macallan Exp $ */
 /*
  * Copyright (c) 2013 Michael Lorenz
  * All rights reserved.
@@ -184,10 +184,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				d = dststart;
 				while ( count < w) {
 					num = min(32, w - count);
-					write_sx_io(p, s,
-					    SX_LD(10, num - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_STM(10, num - 1, d & 7));
+					sxm(SX_LD, s, 10, num - 1);
+					sxm(SX_STM, d, 10, num - 1);
 					s += xinc;
 					d += xinc;
 					count += 32;
@@ -203,10 +201,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				d = dststart;
 				count = w;
 				for (i = 0; i < chunks; i++) {
-					write_sx_io(p, s,
-					    SX_LD(10, 31, s & 7));
-					write_sx_io(p, d,
-					    SX_STM(10, 31, d & 7));
+					sxm(SX_LD, s, 10, 31);
+					sxm(SX_STM, d, 10, 31);
 					s -= 128;
 					d -= 128;
 					count -= 32;
@@ -215,10 +211,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				if (count > 0) {
 					s += (32 - count) << 2;
 					d += (32 - count) << 2;
-					write_sx_io(p, s,
-					    SX_LD(10, count - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_STM(10, count - 1, d & 7));
+					sxm(SX_LD, s, 10, count - 1);
+					sxm(SX_STM, d, 10, count - 1);
 				}
 				srcstart += srcinc;
 				dststart += dstinc;
@@ -234,21 +228,15 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				d = dststart;
 				while ( count < w) {
 					num = min(32, w - count);
-					write_sx_io(p, s,
-					    SX_LD(10, num - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_LD(42, num - 1, d & 7));
+					sxm(SX_LD, s, 10, num - 1);
+					sxm(SX_LD, d, 42, num - 1);
 					if (num > 16) {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, 15));
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(26, 58, 90, num - 17));
+						sxi(SX_ROP(10, 42, 74, 15));
+						sxi(SX_ROP(26, 58, 90, num - 17));
 					} else {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, num - 1));
+						sxi(SX_ROP(10, 42, 74, num - 1));
 					}
-					write_sx_io(p, d,
-					    SX_STM(74, num - 1, d & 7));
+					sxm(SX_STM, d, 74, num - 1);
 					s += xinc;
 					d += xinc;
 					count += 32;
@@ -264,14 +252,11 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				d = dststart;
 				count = w;
 				for (i = 0; i < chunks; i++) {
-					write_sx_io(p, s, SX_LD(10, 31, s & 7));
-					write_sx_io(p, d, SX_LD(42, 31, d & 7));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-				    	    SX_ROP(10, 42, 74, 15));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-				    	    SX_ROP(26, 58, 90, 15));
-					write_sx_io(p, d,
-					    SX_STM(74, 31, d & 7));
+					sxm(SX_LD, s, 10, 31);
+					sxm(SX_LD, d, 42, 31);
+					sxi(SX_ROP(10, 42, 74, 15));
+					sxi(SX_ROP(26, 58, 90, 15));
+					sxm(SX_STM, d, 74, 31);
 					s -= 128;
 					d -= 128;
 					count -= 32;
@@ -280,22 +265,15 @@ CG14Copy32(PixmapPtr pDstPixmap,
 				if (count > 0) {
 					s += (32 - count) << 2;
 					d += (32 - count) << 2;
-					write_sx_io(p, s,
-					    SX_LD(10, count - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_LD(42, count - 1, d & 7));
+					sxm(SX_LD, s, 10, count - 1);
+					sxm(SX_LD, d, 42, count - 1);
 					if (count > 16) {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	    SX_ROP(10, 42, 74, 15));
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(26, 58, 90, count - 17));
+						sxi(SX_ROP(10, 42, 74, 15));
+						sxi(SX_ROP(26, 58, 90, count - 17));
 					} else {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, count - 1));
+						sxi(SX_ROP(10, 42, 74, count - 1));
 					}
-					
-					write_sx_io(p, d,
-					    SX_STM(74, count - 1, d & 7));
+					sxm(SX_STM, d, 74, count - 1);
 				}
 				srcstart += srcinc;
 				dststart += dstinc;
@@ -309,7 +287,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
  * copy with same alignment, left to right, no ROP
  */
 static void
-CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+    int srcpitch, int dstpitch)
 {
 	int saddr, daddr, pre, cnt, wrds;
 
@@ -324,8 +303,8 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s
 		daddr = dststart;
 		cnt = w;
 		if (pre > 0) {
-			write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7));
+			sxm(SX_LDB, saddr, 8, pre - 1);
+			sxm(SX_STB, daddr, 8, pre - 1);
 			saddr += pre;
 			daddr += pre;
 			cnt -= pre;
@@ -333,15 +312,15 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s
 		}
 		while (cnt > 3) {
 			wrds = min(32, cnt >> 2);
-			write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+			sxm(SX_LD, saddr, 8, wrds - 1);
+			sxm(SX_ST, daddr, 8, wrds - 1);
 			saddr += wrds << 2;
 			daddr += wrds << 2;
 			cnt -= wrds << 2;
 		}
 		if (cnt > 0) {
-			write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7));
+			sxm(SX_LDB, saddr, 8, cnt - 1);
+			sxm(SX_STB, daddr, 8, cnt - 1);
 		}
 next:
 		srcstart += srcpitch;
@@ -354,7 +333,8 @@ next:
  * copy with same alignment, left to right, ROP
  */
 static void
-CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+    int srcpitch, int dstpitch)
 {
 	int saddr, daddr, pre, cnt, wrds;
 
@@ -369,10 +349,10 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src
 		daddr = dststart;
 		cnt = w;
 		if (pre > 0) {
-			write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1));
-			write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7));
+			sxm(SX_LDB, saddr, 8, pre - 1);
+			sxm(SX_LDB, daddr, 40, pre - 1);
+			sxi(SX_ROP(8, 40, 72, pre - 1));
+			sxm(SX_STB, daddr, 72, pre - 1);
 			saddr += pre;
 			daddr += pre;
 			cnt -= pre;
@@ -380,23 +360,23 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src
 		}
 		while (cnt > 3) {
 			wrds = min(32, cnt >> 2);
-			write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7));
+			sxm(SX_LD, saddr, 8, wrds - 1);
+			sxm(SX_LD, daddr, 40, wrds - 1);
 			if (cnt > 16) {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15));
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17));
+				sxi(SX_ROP(8, 40, 72, 15));
+				sxi(SX_ROP(8, 56, 88, wrds - 17));
 			} else
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1));
-			write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7));
+				sxi(SX_ROP(8, 40, 72, wrds - 1));
+			sxm(SX_ST, daddr, 72, wrds - 1);
 			saddr += wrds << 2;
 			daddr += wrds << 2;
 			cnt -= wrds << 2;
 		}
 		if (cnt > 0) {
-			write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
-			write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1));
-			write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7));
+			sxm(SX_LDB, saddr, 8, cnt - 1);
+			sxm(SX_LDB, daddr, 40, cnt - 1);
+			sxi(SX_ROP(8, 40, 72, cnt - 1));
+			sxm(SX_STB, daddr, 72, cnt - 1);
 		}
 next:
 		srcstart += srcpitch;
@@ -459,38 +439,38 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
 	daddr = dststart & ~3;
 
 	while (h > 0) {
-		write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7));
-		write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7));
+		sxm(SX_LD, daddr, 80, wrds - 1);
+		sxm(SX_LD, saddr, sreg, swrds - 1);
 		if (wrds > 15) {
 			if (dist != 0) {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
+				sxi(SX_FUNNEL_I(8, dist, 40, 15));
+				sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16));
 				/* shifted source pixels are now at register 40+ */
 				ssreg = 40;
 			} else ssreg = 8;
 			if (pre != 0) {
 				/* mask out leading junk */
 				write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
+				sxi(SX_ROPB(ssreg, 80, 8, 0));
 				write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14));	
+				sxi(SX_ROPB(ssreg + 1, 81, 9, 14));	
 			} else {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15));
+				sxi(SX_ROPB(ssreg, 80, 8, 15));
 			}
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16));
+			sxi(SX_ROPB(ssreg + 16, 96, 24, wrds - 16));
 		} else {
 			if (dist != 0) {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
+				sxi(SX_FUNNEL_I(8, dist, 40, wrds));
 				ssreg = 40;
 			} else ssreg = 8;
 			if (pre != 0) {
 				/* mask out leading junk */
 				write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
+				sxi(SX_ROPB(ssreg, 80, 8, 0));
 				write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds));
+				sxi(SX_ROPB(ssreg + 1, 81, 9, wrds));
 			} else {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds));
+				sxi(SX_ROPB(ssreg, 80, 8, wrds));
 			}
 		}
 		if (post != 0) {
@@ -502,15 +482,15 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
 			 * the left end but it's less annoying this way and
 			 * the instruction count is the same
 			 */
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0));
+			sxi(SX_ANDS(7 + wrds, 7, 5, 0));
+			sxi(SX_ANDS(79 + wrds, 6, 4, 0));
+			sxi(SX_ORS(5, 4, 7 + wrds, 0));
 		}
 #ifdef DEBUG
-		write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7));
+		sxm(SX_ST, taddr, 40, wrds - 1);
 		taddr += dstpitch;
 #endif
-		write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+		sxm(SX_ST, daddr, 8, wrds - 1);
 		saddr += srcpitch;
 		daddr += dstpitch;
 		h--;
@@ -519,7 +499,8 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
 
 /* up to 124 pixels so direction doesn't matter, unaligned, straight copy */
 static void
-CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+    int srcpitch, int dstpitch)
 {
 	int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post;
 	int ssreg;
@@ -571,30 +552,30 @@ CG14Copy8_short_norop(Cg14Ptr p, int src
 	daddr = dststart & ~3;
 	
 	while (h > 0) {
-		write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7));
+		sxm(SX_LD, saddr, sreg, swrds - 1);
 		if (wrds > 15) {
 			if (dist != 0) {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
-				/* shifted source pixels are now at register 40+ */
+				sxi(SX_FUNNEL_I(8, dist, 40, 15));
+				sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16));
+				/* shifted source pixels are now at reg 40+ */
 				ssreg = 40;
 			} else ssreg = 8;
 			if (pre != 0) {
 				/* read only the first word */
-				write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7));
+				sxm(SX_LD, daddr, 80, 0);
 				/* mask out leading junk */
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0));
+				sxi(SX_ROPB(ssreg, 80, ssreg, 0));
 			}
 		} else {
 			if (dist != 0) {
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
+				sxi(SX_FUNNEL_I(8, dist, 40, wrds));
 				ssreg = 40;
 			} else ssreg = 8;
 			if (pre != 0) {
 				/* read only the first word */
-				write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7));
+				sxm(SX_LD, daddr, 80, 0);
 				/* mask out leading junk */
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0));
+				sxi(SX_ROPB(ssreg, 80, ssreg, 0));
 			}
 		}
 		if (post != 0) {
@@ -607,16 +588,16 @@ CG14Copy8_short_norop(Cg14Ptr p, int src
 			 * the left end but it's less annoying this way and
 			 * the instruction count is the same
 			 */
-			write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0));
+			sxm(SX_LD, laddr, 81, 0);
+			sxi(SX_ANDS(ssreg + wrds - 1, 7, 5, 0));
+			sxi(SX_ANDS(81, 6, 4, 0));
+			sxi(SX_ORS(5, 4, ssreg + wrds - 1, 0));
 		}
 #ifdef DEBUG
-		write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7));
+		sxm(SX_ST, taddr, 40, wrds - 1);
 		taddr += dstpitch;
 #endif
-		write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7));
+		sxm(SX_ST, daddr, ssreg, wrds - 1);
 		saddr += srcpitch;
 		daddr += dstpitch;
 		h--;
@@ -663,10 +644,12 @@ CG14Copy8(PixmapPtr pDstPixmap,
 	if ((w < 125) && (w > 8)) {
 		switch (p->last_rop) {
 			case 0xcc:
-				CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+				CG14Copy8_short_norop(p,
+				    srcstart, dststart, w, h, srcinc, dstinc);
 				break;
 			default:
-				CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+				CG14Copy8_short_rop(p,
+				    srcstart, dststart, w, h, srcinc, dstinc);
 		}
 		return;
 	}
@@ -687,10 +670,12 @@ CG14Copy8(PixmapPtr pDstPixmap,
 	if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) {
 		switch (p->last_rop) {
 			case 0xcc:
-				CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+				CG14Copy8_aligned_norop(p,
+				    srcstart, dststart, w, h, srcinc, dstinc);
 				break;
 			default:
-				CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+				CG14Copy8_aligned_rop(p,
+				    srcstart, dststart, w, h, srcinc, dstinc);
 		}
 		return;
 	}
@@ -706,18 +691,22 @@ CG14Copy8(PixmapPtr pDstPixmap,
 	 */
 	if (w > 8) {
 		int next, wi, end = dststart + w;
-		DPRINTF(X_ERROR, "%s %08x %08x %d\n", __func__, srcstart, dststart, w);
+		DPRINTF(X_ERROR, "%s %08x %08x %d\n",
+		    __func__, srcstart, dststart, w);
 		if ((p->xdir < 0) && (srcoff == dstoff)) {		
 			srcstart += w;
 			next = max((end - 120) & ~3, dststart);
 			wi = end - next;
 			srcstart -= wi;
 			while (wi > 0) {
-				DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", __func__, srcstart, next, wi);
+				DPRINTF(X_ERROR, "%s RL %08x %08x %d\n",
+				    __func__, srcstart, next, wi);
 				if (p->last_rop == 0xcc) {
-					CG14Copy8_short_norop(p, srcstart, next, wi, h, srcinc, dstinc);
+					CG14Copy8_short_norop(p, srcstart,
+					    next, wi, h, srcinc, dstinc);
 				} else
-					CG14Copy8_short_rop(p, srcstart, next, wi, h, srcinc, dstinc);
+					CG14Copy8_short_rop(p, srcstart,
+					    next, wi, h, srcinc, dstinc);
 				end = next;
 				/*
 				 * avoid extremely narrow copies so I don't
@@ -736,11 +725,16 @@ CG14Copy8(PixmapPtr pDstPixmap,
 			next = min(end, (dststart + 124) & ~3);
 			wi = next - dststart;
 			while (wi > 0) {
-				DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", __func__, srcstart, next, wi);
+				DPRINTF(X_ERROR, "%s LR %08x %08x %d\n",
+				    __func__, srcstart, next, wi);
 				if (p->last_rop == 0xcc) {
-					CG14Copy8_short_norop(p, srcstart, dststart, wi, h, srcinc, dstinc);
+					CG14Copy8_short_norop(p, 
+					    srcstart, dststart, wi, h,
+					    srcinc, dstinc);
 				} else
-					CG14Copy8_short_rop(p, srcstart, dststart, wi, h, srcinc, dstinc);
+					CG14Copy8_short_rop(p,
+					    srcstart, dststart, wi, h,
+					    srcinc, dstinc);
 				srcstart += wi;
 				dststart = next;
 				if ((end - dststart) < 140) {
@@ -769,10 +763,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				d = dststart;
 				while ( count < w) {
 					num = min(32, w - count);
-					write_sx_io(p, s,
-					    SX_LDB(10, num - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_STBM(10, num - 1, d & 7));
+					sxm(SX_LDB, s, 10, num - 1);
+					sxm(SX_STBM, d, 10, num - 1);
 					s += xinc;
 					d += xinc;
 					count += 32;
@@ -788,10 +780,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				d = dststart;
 				count = w;
 				for (i = 0; i < chunks; i++) {
-					write_sx_io(p, s,
-					    SX_LDB(10, 31, s & 7));
-					write_sx_io(p, d,
-					    SX_STBM(10, 31, d & 7));
+					sxm(SX_LDB, s, 10, 31);
+					sxm(SX_STBM, d, 10, 31);
 					s -= 32;
 					d -= 32;
 					count -= 32;
@@ -800,10 +790,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				if (count > 0) {
 					s += (32 - count);
 					d += (32 - count);
-					write_sx_io(p, s,
-					    SX_LDB(10, count - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_STBM(10, count - 1, d & 7));
+					sxm(SX_LDB, s, 10, count - 1);
+					sxm(SX_STBM, d, 10, count - 1);
 				}
 				srcstart += srcinc;
 				dststart += dstinc;
@@ -819,21 +807,15 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				d = dststart;
 				while ( count < w) {
 					num = min(32, w - count);
-					write_sx_io(p, s,
-					    SX_LDB(10, num - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_LDB(42, num - 1, d & 7));
+					sxm(SX_LDB, s, 10, num - 1);
+					sxm(SX_LDB, d, 42, num - 1);
 					if (num > 16) {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, 15));
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(26, 58, 90, num - 17));
+						sxi(SX_ROP(10, 42, 74, 15));
+						sxi(SX_ROP(26, 58, 90, num - 17));
 					} else {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, num - 1));
+						sxi(SX_ROP(10, 42, 74, num - 1));
 					}
-					write_sx_io(p, d,
-					    SX_STBM(74, num - 1, d & 7));
+					sxm(SX_STBM, d, 74, num - 1);
 					s += xinc;
 					d += xinc;
 					count += 32;
@@ -849,14 +831,11 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				d = dststart;
 				count = w;
 				for (i = 0; i < chunks; i++) {
-					write_sx_io(p, s, SX_LDB(10, 31, s & 7));
-					write_sx_io(p, d, SX_LDB(42, 31, d & 7));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-				    	    SX_ROP(10, 42, 74, 15));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-				    	    SX_ROP(26, 58, 90, 15));
-					write_sx_io(p, d,
-					    SX_STBM(74, 31, d & 7));
+					sxm(SX_LDB, s, 10, 31);
+					sxm(SX_LDB, d, 42, 31);
+					sxi(SX_ROP(10, 42, 74, 15));
+					sxi(SX_ROP(26, 58, 90, 15));
+					sxm(SX_STBM, d, 74, 31);
 					s -= 128;
 					d -= 128;
 					count -= 32;
@@ -865,22 +844,15 @@ CG14Copy8(PixmapPtr pDstPixmap,
 				if (count > 0) {
 					s += (32 - count);
 					d += (32 - count);
-					write_sx_io(p, s,
-					    SX_LDB(10, count - 1, s & 7));
-					write_sx_io(p, d,
-					    SX_LDB(42, count - 1, d & 7));
+					sxm(SX_LDB, s, 10, count - 1);
+					sxm(SX_LDB, d, 42, count - 1);
 					if (count > 16) {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	    SX_ROP(10, 42, 74, 15));
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(26, 58, 90, count - 17));
+						sxi(SX_ROP(10, 42, 74, 15));
+						sxi(SX_ROP(26, 58, 90, count - 17));
 					} else {
-						write_sx_reg(p, SX_INSTRUCTIONS,
-					    	 SX_ROP(10, 42, 74, count - 1));
+						sxi(SX_ROP(10, 42, 74, count - 1));
 					}
-					
-					write_sx_io(p, d,
-					    SX_STBM(74, count - 1, d & 7));
+					sxm(SX_STBM, d, 74, count - 1);
 				}
 				srcstart += srcinc;
 				dststart += dstinc;
@@ -956,8 +928,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
 			while (x < w) {
 				ptr = start + (x << 2);
 				num = min(32, w - x);
-				write_sx_io(p, ptr,
-				    SX_STS(8, num - 1, ptr & 7));
+				sxm(SX_STS, ptr, 8, num - 1);
 				x += 32;
 			}
 			start += pitch;
@@ -969,8 +940,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
 		/* alright, let's do actual ROP stuff */
 
 		/* first repeat the fill colour into 16 registers */
-		write_sx_reg(p, SX_INSTRUCTIONS,
-		    SX_SELECT_S(8, 8, 10, 15));
+		sxi(SX_SELECT_S(8, 8, 10, 15));
 
 		for (line = 0; line < h; line++) {
 			x = 0;
@@ -978,24 +948,19 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
 				ptr = start + (x << 2);
 				num = min(32, w - x);
 				/* now suck fb data into registers */
-				write_sx_io(p, ptr,
-				    SX_LD(42, num - 1, ptr & 7));
+				sxm(SX_LD, ptr, 42, num - 1);
 				/*
 				 * ROP them with the fill data we left in 10
 				 * non-memory ops can only have counts up to 16
 				 */
 				if (num <= 16) {
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 42, 74, num - 1));
+					sxi(SX_ROP(10, 42, 74, num - 1));
 				} else {
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 42, 74, 15));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 58, 90, num - 17));
+					sxi(SX_ROP(10, 42, 74, 15));
+					sxi(SX_ROP(10, 58, 90, num - 17));
 				}
 				/* and write the result back into memory */
-				write_sx_io(p, ptr,
-				    SX_ST(74, num - 1, ptr & 7));
+				sxm(SX_ST, ptr, 74, num - 1);
 				x += 32;
 			}
 			start += pitch;
@@ -1020,7 +985,7 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui
 			cnt = w;
 			pre = min(pre, cnt);
 			if (pre) {
-				write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7));
+				sxm(SX_STBS, ptr, 8, pre - 1);
 				ptr += pre;
 				cnt -= pre;
 				if (cnt == 0) goto next;
@@ -1029,13 +994,13 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui
 			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
 			while(cnt > 3) {
 				num = min(32, cnt >> 2);
-				write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7));
+				sxm(SX_STS, ptr, 8, num - 1);
 				ptr += num << 2;
 				cnt -= num << 2;
 			}
 			if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
 			if (cnt > 0) {
-				write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7));
+				sxm(SX_STBS, ptr, 8, cnt - 1);
 			}
 			if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
 next:
@@ -1048,17 +1013,16 @@ next:
 		/* alright, let's do actual ROP stuff */
 
 		/* first repeat the fill colour into 16 registers */
-		write_sx_reg(p, SX_INSTRUCTIONS,
-		    SX_SELECT_S(8, 8, 10, 15));
+		sxi(SX_SELECT_S(8, 8, 10, 15));
 
 		for (line = 0; line < h; line++) {
 			ptr = start;
 			cnt = w;
 			pre = min(pre, cnt);
 			if (pre) {
-				write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7));
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1));
-				write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7));
+				sxm(SX_LDB, ptr, 26, pre - 1);
+				sxi(SX_ROP(10, 26, 42, pre - 1));
+				sxm(SX_STB, ptr, 42, pre - 1);
 				ptr += pre;
 				cnt -= pre;
 				if (cnt == 0) goto next2;
@@ -1067,25 +1031,22 @@ next:
 			if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
 			while(cnt > 3) {
 				num = min(32, cnt >> 2);
-				write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7));
+				sxm(SX_LD, ptr, 26, num - 1);
 				if (num <= 16) {
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 26, 58, num - 1));
+					sxi(SX_ROP(10, 26, 58, num - 1));
 				} else {
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 26, 58, 15));
-					write_sx_reg(p, SX_INSTRUCTIONS,
-					    SX_ROP(10, 42, 74, num - 17));
+					sxi(SX_ROP(10, 26, 58, 15));
+					sxi(SX_ROP(10, 42, 74, num - 17));
 				}
-				write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7));
+				sxm(SX_ST, ptr, 58, num - 1);
 				ptr += num << 2;
 				cnt -= num << 2;
 			}
 			if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
 			if (cnt > 0) {
-				write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7));
-				write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1));
-				write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7));
+				sxm(SX_LDB, ptr, 26, cnt - 1);
+				sxi(SX_ROP(10, 26, 42, cnt - 1));
+				sxm(SX_STB, ptr, 42, cnt - 1);
 			}
 			if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
 next2:

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.14
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13	Wed Jul 24 16:07:59 2019
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c	Fri Dec 24 04:41:40 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_render.c,v 1.13 2019/07/24 16:07:59 macallan Exp $ */
+/* $NetBSD: cg14_render.c,v 1.14 2021/12/24 04:41:40 macallan Exp $ */
 /*
  * Copyright (c) 2013 Michael Lorenz
  * All rights reserved.
@@ -75,37 +75,30 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
 		for (x = 0; x < width; x += 4) {
 			rest = width - x;
 			/* fetch 4 mask values */
-			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
+			sxm(SX_LDUQ0, mskx, 12, 3);
 			/* fetch destination pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 60, 3);
 			/* duplicate them for all channels */
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
+			sxi(SX_ORS(0, 12, 13, 2));
+			sxi(SX_ORS(0, 16, 17, 2));
+			sxi(SX_ORS(0, 20, 21, 2));
+			sxi(SX_ORS(0, 24, 25, 2));
 			/* generate inverted alpha */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_XORS(12, 8, 28, 15));
+			sxi(SX_XORS(12, 8, 28, 15));
 			/* multiply source */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 12, 44, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 16, 48, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 20, 52, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 24, 56, 3));
+			sxi(SX_MUL16X16SR8(8, 12, 44, 3));
+			sxi(SX_MUL16X16SR8(8, 16, 48, 3));
+			sxi(SX_MUL16X16SR8(8, 20, 52, 3));
+			sxi(SX_MUL16X16SR8(8, 24, 56, 3));
 			/* multiply dest */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(28, 60, 76, 15));
+			sxi(SX_MUL16X16SR8(28, 60, 76, 15));
 			/* add up */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(44, 76, 92, 15));
+			sxi(SX_ADDV(44, 76, 92, 15));
 			/* write back */
 			if (rest < 4) {
-				write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
+				sxm(SX_STUQ0C, dstx, 92, rest - 1);
 			} else {
-				write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+				sxm(SX_STUQ0C, dstx, 92, 3);
 			}
 			dstx += 16;
 			mskx += 16;
@@ -118,7 +111,7 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
 				/* nothing to do - all transparent */
 			} else if (m == 0xff) {
 				/* all opaque */
-				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+				sxm(SX_STUQ0, dstx, 8, 0);
 			} else {
 				/* fetch alpha value, stick it into scam */
 				/* mask is in R[12:15] */
@@ -126,28 +119,22 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
 				    SX_LDUQ0(12, 0, mskx & 7));*/
 				write_sx_reg(p, SX_QUEUED(12), m);
 				/* fetch dst pixel */
-				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ORV(12, 0, R_SCAM, 0));
+				sxm(SX_LDUQ0, dstx, 20, 0);
+				sxi(SX_ORV(12, 0, R_SCAM, 0));
 				/*
 				 * src * alpha + R0
 				 * R[9:11] * SCAM + R0 -> R[17:19]
 				 */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(9, 0, 17, 2));
+				sxi(SX_SAXP16X16SR8(9, 0, 17, 2));
 			
 				/* invert SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORV(12, 8, R_SCAM, 0));
+				sxi(SX_XORV(12, 8, R_SCAM, 0));
 #ifdef SX_DEBUG
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORV(12, 8, 13, 0));
+				sxi(SX_XORV(12, 8, 13, 0));
 #endif
 				/* dst * (1 - alpha) + R[13:15] */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(21, 17, 25, 2));
-				write_sx_io(p, dstx,
-				    SX_STUQ0C(24, 0, dstx & 7));
+				sxi(SX_SAXP16X16SR8(21, 17, 25, 2));
+				sxm(SX_STUQ0C, dstx, 24, 0);
 			}
 			dstx += 4;
 			mskx += 4;
@@ -181,37 +168,30 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
 		for (x = 0; x < width; x += 4) {
 			rest = width - x;			
 			/* fetch 4 mask values */
-			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
+			sxm(SX_LDB, mskx, 12, 3);
 			/* fetch destination pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 60, 3);
 			/* duplicate them for all channels */
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
+			sxi(SX_ORS(0, 13, 16, 3));
+			sxi(SX_ORS(0, 14, 20, 3));
+			sxi(SX_ORS(0, 15, 24, 3));
+			sxi(SX_ORS(0, 12, 13, 2));
 			/* generate inverted alpha */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_XORS(12, 8, 28, 15));
+			sxi(SX_XORS(12, 8, 28, 15));
 			/* multiply source */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 12, 44, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 16, 48, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 20, 52, 3));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(8, 24, 56, 3));
+			sxi(SX_MUL16X16SR8(8, 12, 44, 3));
+			sxi(SX_MUL16X16SR8(8, 16, 48, 3));
+			sxi(SX_MUL16X16SR8(8, 20, 52, 3));
+			sxi(SX_MUL16X16SR8(8, 24, 56, 3));
 			/* multiply dest */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_MUL16X16SR8(28, 60, 76, 15));
+			sxi(SX_MUL16X16SR8(28, 60, 76, 15));
 			/* add up */
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(44, 76, 92, 15));
+			sxi(SX_ADDV(44, 76, 92, 15));
 			/* write back */
 			if (rest < 4) {
-				write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
+				sxm(SX_STUQ0C, dstx, 92, rest - 1);
 			} else {
-				write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+				sxm(SX_STUQ0C, dstx, 92, 3);
 			}
 			dstx += 16;
 			mskx += 4;
@@ -226,7 +206,7 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
 				/* nothing to do - all transparent */
 			} else if (m == 0xff) {
 				/* all opaque */
-				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+				sxm(SX_STUQ0, dstx, 8, 0);
 			} else {
 				/* fetch alpha value, stick it into scam */
 				/* mask is in R[12:15] */
@@ -234,28 +214,22 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
 				    SX_LDB(12, 0, mskx & 7));*/
 				write_sx_reg(p, SX_QUEUED(12), m);
 				/* fetch dst pixel */
-				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ORV(12, 0, R_SCAM, 0));
+				sxm(SX_LDUQ0, dstx, 20, 0);
+				sxi(SX_ORV(12, 0, R_SCAM, 0));
 				/*
 				 * src * alpha + R0
 				 * R[9:11] * SCAM + R0 -> R[17:19]
 				 */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(9, 0, 17, 2));
+				sxi(SX_SAXP16X16SR8(9, 0, 17, 2));
 			
 				/* invert SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORV(12, 8, R_SCAM, 0));
+				sxi(SX_XORV(12, 8, R_SCAM, 0));
 #ifdef SX_DEBUG
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORV(12, 8, 13, 0));
+				sxi(SX_XORV(12, 8, 13, 0));
 #endif
 				/* dst * (1 - alpha) + R[13:15] */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(21, 17, 25, 2));
-				write_sx_io(p, dstx,
-				    SX_STUQ0C(24, 0, dstx & 7));
+				sxi(SX_SAXP16X16SR8(21, 17, 25, 2));
+				sxm(SX_STUQ0C, dstx, 24, 0);
 			}
 			dstx += 4;
 			mskx += 1;
@@ -287,30 +261,25 @@ void CG14Comp_Add32(Cg14Ptr p,
 		srcx = src;
 		dstx = dst;
 		for (x = 0; x < full; x++) {
-			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
-			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(8, 40, 72, 15));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(24, 56, 88, 15));
-			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
+			sxm(SX_LDUQ0, srcx, 8, 31);
+			sxm(SX_LDUQ0, dstx, 40, 31);
+			sxi(SX_ADDV(8, 40, 72, 15));
+			sxi(SX_ADDV(24, 56, 88, 15));
+			sxm(SX_STUQ0, dstx, 72, 31);
 			srcx += 128;
 			dstx += 128;
 		}
 
 		/* do leftovers */
-		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
-		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
+		sxm(SX_LDUQ0, srcx, 8, part - 1);
+		sxm(SX_LDUQ0, dstx, 40, part - 1);
 		if (part & 16) {
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(8, 40, 72, 15));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(24, 56, 88, part - 17));
+			sxi(SX_ADDV(8, 40, 72, 15));
+			sxi(SX_ADDV(24, 56, 88, part - 17));
 		} else {
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(8, 40, 72, part - 1));
+			sxi(SX_ADDV(8, 40, 72, part - 1));
 		}
-		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
+		sxm(SX_STUQ0, dstx, 72, part - 1);
 		
 		/* next line */
 		src += srcpitch;
@@ -355,10 +324,8 @@ void CG14Comp_Add8(Cg14Ptr p,
 		for (x = 0; x < full; x++) {
 			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
 			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(8, 40, 72, 15));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(24, 56, 88, 15));
+			sxi(SX_ADDV(8, 40, 72, 15));
+			sxi(SX_ADDV(24, 56, 88, 15));
 			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
 			srcx += 32;
 			dstx += 32;
@@ -369,13 +336,10 @@ void CG14Comp_Add8(Cg14Ptr p,
 			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
 			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
 			if (part > 16) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(8, 40, 72, 15));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(24, 56, 88, part - 17));
+				sxi(SX_ADDV(8, 40, 72, 15));
+				sxi(SX_ADDV(24, 56, 88, part - 17));
 			} else {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(8, 40, 72, part - 1));
+				sxi(SX_ADDV(8, 40, 72, part - 1));
 			}
 			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
 		}
@@ -426,10 +390,8 @@ void CG14Comp_Add8_32(Cg14Ptr p,
 			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
 			/* load alpha from destination */
 			write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(8, 40, 72, 15));
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_ADDV(24, 56, 88, 15));
+			sxi(SX_ADDV(8, 40, 72, 15));
+			sxi(SX_ADDV(24, 56, 88, 15));
 			/* write clamped values back into dest alpha */
 			write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff));
 			srcx += 32;
@@ -441,13 +403,10 @@ void CG14Comp_Add8_32(Cg14Ptr p,
 			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
 			write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff));
 			if (part > 16) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(8, 40, 72, 15));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(24, 56, 88, part - 17));
+				sxi(SX_ADDV(8, 40, 72, 15));
+				sxi(SX_ADDV(24, 56, 88, part - 17));
 			} else {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ADDV(8, 40, 72, part - 1));
+				sxi(SX_ADDV(8, 40, 72, part - 1));
 			}
 			write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff));
 		}
@@ -488,31 +447,24 @@ void CG14Comp_Over32(Cg14Ptr p,
 				continue;
 			}
 			/* fetch source pixels */
-			write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+			sxm(SX_LDUQ0, srcx, 12, num - 1);
 			if (flip) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(13, 4, 40, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(15, 4, 44, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(40, 4, 15, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(44, 4, 13, num - 1));
+				sxi(SX_GATHER(13, 4, 40, num - 1));
+				sxi(SX_GATHER(15, 4, 44, num - 1));
+				sxi(SX_SCATTER(40, 4, 15, num - 1));
+				sxi(SX_SCATTER(44, 4, 13, num - 1));
 			}
 			/* fetch dst pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 44, num - 1);
 			/* now process up to 4 pixels */
 			for (i = 0; i < num; i++) {
 				int ii = i << 2;
 				/* write inverted alpha into SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORS(12 + ii, 8, R_SCAM, 0));
+				sxi(SX_XORS(12 + ii, 8, R_SCAM, 0));
 				/* dst * (1 - alpha) + src */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3));
+				sxi(SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3));
 			}
-			write_sx_io(p, dstx,
-			    SX_STUQ0C(76, num - 1, dstx & 7));
+			sxm(SX_STUQ0C, dstx, 76, num - 1);
 			srcx += 16;
 			dstx += 16;
 		}
@@ -546,39 +498,30 @@ void CG14Comp_Over32Mask(Cg14Ptr p,
 				continue;
 			}
 			/* fetch source pixels */
-			write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+			sxm(SX_LDUQ0, srcx, 12, num - 1);
 			if (flip) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(13, 4, 40, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(15, 4, 44, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(40, 4, 15, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(44, 4, 13, num - 1));
+				sxi(SX_GATHER(13, 4, 40, num - 1));
+				sxi(SX_GATHER(15, 4, 44, num - 1));
+				sxi(SX_SCATTER(40, 4, 15, num - 1));
+				sxi(SX_SCATTER(44, 4, 13, num - 1));
 			}
 			/* fetch mask */
-			write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7));
+			sxm(SX_LDB, mskx, 28, num - 1);
 			/* fetch dst pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 44, num - 1);
 			/* now process up to 4 pixels */
 			for (i = 0; i < num; i++) {
 				int ii = i << 2;
 				/* mask alpha to SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ORS(28 + i, 0, R_SCAM, 0));
+				sxi(SX_ORS(28 + i, 0, R_SCAM, 0));
 				/* src * alpha */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+				sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
 				/* write inverted alpha into SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORS(28 + i, 8, R_SCAM, 0));
+				sxi(SX_XORS(28 + i, 8, R_SCAM, 0));
 				/* dst * (1 - alpha) + R[60:] */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+				sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
 			}
-			write_sx_io(p, dstx,
-			    SX_STUQ0C(76, num - 1, dstx & 7));
+			sxm(SX_STUQ0C, dstx, 76, num - 1);
 			srcx += 16;
 			mskx += 4;
 			dstx += 16;
@@ -602,7 +545,7 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr
 
 	write_sx_reg(p, SX_QUEUED(8), 0xff);
 	write_sx_reg(p, SX_QUEUED(9), 0xff);
-	write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1));
+	sxi(SX_ORS(8, 0, 10, 1));
 	for (line = 0; line < height; line++) {
 		srcx = src;
 		mskx = msk;
@@ -616,42 +559,32 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr
 				continue;
 			}
 			/* fetch source pixels */
-			write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+			sxm(SX_LDUQ0, srcx, 12, num - 1);
 			if (flip) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(13, 4, 40, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(15, 4, 44, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(40, 4, 15, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(44, 4, 13, num - 1));
+				sxi(SX_GATHER(13, 4, 40, num - 1));
+				sxi(SX_GATHER(15, 4, 44, num - 1));
+				sxi(SX_SCATTER(40, 4, 15, num - 1));
+				sxi(SX_SCATTER(44, 4, 13, num - 1));
 			}
 			/* fetch mask */
-			write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7));
+			sxm(SX_LDB, mskx, 28, num - 1);
 			/* fetch dst pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 44, num - 1);
 			/* set src alpha to 0xff */			
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_SCATTER(8, 4, 12, num - 1));
+			sxi(SX_SCATTER(8, 4, 12, num - 1));
 			/* now process up to 4 pixels */
 			for (i = 0; i < num; i++) {
 				int ii = i << 2;
 				/* mask alpha to SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ORS(28 + i, 0, R_SCAM, 0));
+				sxi(SX_ORS(28 + i, 0, R_SCAM, 0));
 				/* src * alpha */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+				sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
 				/* write inverted alpha into SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORS(28 + i, 8, R_SCAM, 0));
+				sxi(SX_XORS(28 + i, 8, R_SCAM, 0));
 				/* dst * (1 - alpha) + R[60:] */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+				sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
 			}
-			write_sx_io(p, dstx,
-			    SX_STUQ0C(76, num - 1, dstx & 7));
+			sxm(SX_STUQ0C, dstx, 76, num - 1);
 			srcx += 16;
 			mskx += 4;
 			dstx += 16;
@@ -675,7 +608,7 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P
 
 	write_sx_reg(p, SX_QUEUED(8), 0xff);
 	write_sx_reg(p, SX_QUEUED(9), 0xff);
-	write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1));
+	sxi(SX_ORS(8, 0, 10, 1));
 	for (line = 0; line < height; line++) {
 		srcx = src;
 		mskx = msk;
@@ -689,42 +622,32 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P
 				continue;
 			}
 			/* fetch source pixels */
-			write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+			sxm(SX_LDUQ0, srcx, 12, num - 1);
 			if (flip) {
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(13, 4, 40, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_GATHER(15, 4, 44, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(40, 4, 15, num - 1));
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SCATTER(44, 4, 13, num - 1));
+				sxi(SX_GATHER(13, 4, 40, num - 1));
+				sxi(SX_GATHER(15, 4, 44, num - 1));
+				sxi(SX_SCATTER(40, 4, 15, num - 1));
+				sxi(SX_SCATTER(44, 4, 13, num - 1));
 			}
 			/* fetch mask */
-			write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7));
+			sxm(SX_LDUQ0, mskx, 28, num - 1);
 			/* fetch dst pixels */
-			write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+			sxm(SX_LDUQ0, dstx, 44, num - 1);
 			/* set src alpha to 0xff */			
-			write_sx_reg(p, SX_INSTRUCTIONS,
-			    SX_SCATTER(8, 4, 12, num - 1));
+			sxi(SX_SCATTER(8, 4, 12, num - 1));
 			/* now process up to 4 pixels */
 			for (i = 0; i < num; i++) {
 				int ii = i << 2;
 				/* mask alpha to SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_ORS(28 + ii, 0, R_SCAM, 0));
+				sxi(SX_ORS(28 + ii, 0, R_SCAM, 0));
 				/* src * alpha */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+				sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
 				/* write inverted alpha into SCAM */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_XORS(28 + ii, 8, R_SCAM, 0));
+				sxi(SX_XORS(28 + ii, 8, R_SCAM, 0));
 				/* dst * (1 - alpha) + R[60:] */
-				write_sx_reg(p, SX_INSTRUCTIONS,
-				    SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+				sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
 			}
-			write_sx_io(p, dstx,
-			    SX_STUQ0C(76, num - 1, dstx & 7));
+			sxm(SX_STUQ0C, dstx, 76, num - 1);
 			srcx += 16;
 			mskx += 16;
 			dstx += 16;

Reply via email to