Module Name: xsrc Committed By: macallan Date: Fri Dec 24 04:41:40 UTC 2021
Modified Files: xsrc/external/mit/xf86-video-suncg14/dist/src: cg14.h cg14_accel.c cg14_render.c Log Message: add macros to simplify issuing SX instructions, hide the address alignment / displacement shenanigans SX needs, and make the code more resemble the inline assembler source it arguably is To generate a diff of this commit: cvs rdiff -u -r1.14 -r1.15 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h cvs rdiff -u -r1.26 -r1.27 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c cvs rdiff -u -r1.13 -r1.14 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.15 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14 Wed Jul 24 16:07:59 2019 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h Fri Dec 24 04:41:40 2021 @@ -135,6 +135,9 @@ write_sx_io(Cg14Ptr p, int reg, uint32_t p->queuecount++; } +#define sxi(inst) write_sx_reg(p, SX_INSTRUCTIONS, (inst)) +#define sxm(inst, addr, reg, count) write_sx_io(p, (addr) & ~7, inst((reg), (count), (addr) & 7)) + Bool CG14SetupCursor(ScreenPtr); Bool CG14InitAccel(ScreenPtr); Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.27 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26 Sun Dec 19 04:50:27 2021 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 24 04:41:40 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cg14_accel.c,v 1.26 2021/12/19 04:50:27 macallan Exp $ */ +/* $NetBSD: cg14_accel.c,v 1.27 2021/12/24 04:41:40 macallan Exp $ */ /* * Copyright (c) 2013 Michael Lorenz * All rights reserved. @@ -184,10 +184,8 @@ CG14Copy32(PixmapPtr pDstPixmap, d = dststart; while ( count < w) { num = min(32, w - count); - write_sx_io(p, s, - SX_LD(10, num - 1, s & 7)); - write_sx_io(p, d, - SX_STM(10, num - 1, d & 7)); + sxm(SX_LD, s, 10, num - 1); + sxm(SX_STM, d, 10, num - 1); s += xinc; d += xinc; count += 32; @@ -203,10 +201,8 @@ CG14Copy32(PixmapPtr pDstPixmap, d = dststart; count = w; for (i = 0; i < chunks; i++) { - write_sx_io(p, s, - SX_LD(10, 31, s & 7)); - write_sx_io(p, d, - SX_STM(10, 31, d & 7)); + sxm(SX_LD, s, 10, 31); + sxm(SX_STM, d, 10, 31); s -= 128; d -= 128; count -= 32; @@ -215,10 +211,8 @@ CG14Copy32(PixmapPtr pDstPixmap, if (count > 0) { s += (32 - count) << 2; d += (32 - count) << 2; - write_sx_io(p, s, - SX_LD(10, count - 1, s & 7)); - write_sx_io(p, d, - SX_STM(10, count - 1, d & 7)); + sxm(SX_LD, s, 10, count - 1); + sxm(SX_STM, d, 10, count - 1); } srcstart += srcinc; dststart += dstinc; @@ -234,21 +228,15 @@ CG14Copy32(PixmapPtr pDstPixmap, d = dststart; while ( count < w) { num = min(32, w - count); - write_sx_io(p, s, - SX_LD(10, num - 1, s & 7)); - write_sx_io(p, d, - SX_LD(42, num - 1, d & 7)); + sxm(SX_LD, s, 10, num - 1); + sxm(SX_LD, d, 42, num - 1); if (num > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, num - 17)); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, num - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, num - 1)); + sxi(SX_ROP(10, 42, 74, num - 1)); } - write_sx_io(p, d, - SX_STM(74, num - 1, d & 7)); + sxm(SX_STM, d, 74, num - 1); s += xinc; d += xinc; count += 32; @@ -264,14 +252,11 @@ CG14Copy32(PixmapPtr pDstPixmap, d = dststart; count = w; for (i = 0; i < chunks; i++) { - write_sx_io(p, s, SX_LD(10, 31, s & 7)); - write_sx_io(p, d, SX_LD(42, 31, d & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, 15)); - write_sx_io(p, d, - SX_STM(74, 31, d & 7)); + sxm(SX_LD, s, 10, 31); + sxm(SX_LD, d, 42, 31); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, 15)); + sxm(SX_STM, d, 74, 31); s -= 128; d -= 128; count -= 32; @@ -280,22 +265,15 @@ CG14Copy32(PixmapPtr pDstPixmap, if (count > 0) { s += (32 - count) << 2; d += (32 - count) << 2; - write_sx_io(p, s, - SX_LD(10, count - 1, s & 7)); - write_sx_io(p, d, - SX_LD(42, count - 1, d & 7)); + sxm(SX_LD, s, 10, count - 1); + sxm(SX_LD, d, 42, count - 1); if (count > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, count - 17)); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, count - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, count - 1)); + sxi(SX_ROP(10, 42, 74, count - 1)); } - - write_sx_io(p, d, - SX_STM(74, count - 1, d & 7)); + sxm(SX_STM, d, 74, count - 1); } srcstart += srcinc; dststart += dstinc; @@ -309,7 +287,8 @@ CG14Copy32(PixmapPtr pDstPixmap, * copy with same alignment, left to right, no ROP */ static void -CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) +CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, + int srcpitch, int dstpitch) { int saddr, daddr, pre, cnt, wrds; @@ -324,8 +303,8 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s daddr = dststart; cnt = w; if (pre > 0) { - write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7)); + sxm(SX_LDB, saddr, 8, pre - 1); + sxm(SX_STB, daddr, 8, pre - 1); saddr += pre; daddr += pre; cnt -= pre; @@ -333,15 +312,15 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s } while (cnt > 3) { wrds = min(32, cnt >> 2); - write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); + sxm(SX_LD, saddr, 8, wrds - 1); + sxm(SX_ST, daddr, 8, wrds - 1); saddr += wrds << 2; daddr += wrds << 2; cnt -= wrds << 2; } if (cnt > 0) { - write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7)); + sxm(SX_LDB, saddr, 8, cnt - 1); + sxm(SX_STB, daddr, 8, cnt - 1); } next: srcstart += srcpitch; @@ -354,7 +333,8 @@ next: * copy with same alignment, left to right, ROP */ static void -CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) +CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, + int srcpitch, int dstpitch) { int saddr, daddr, pre, cnt, wrds; @@ -369,10 +349,10 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src daddr = dststart; cnt = w; if (pre > 0) { - write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1)); - write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7)); + sxm(SX_LDB, saddr, 8, pre - 1); + sxm(SX_LDB, daddr, 40, pre - 1); + sxi(SX_ROP(8, 40, 72, pre - 1)); + sxm(SX_STB, daddr, 72, pre - 1); saddr += pre; daddr += pre; cnt -= pre; @@ -380,23 +360,23 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src } while (cnt > 3) { wrds = min(32, cnt >> 2); - write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7)); + sxm(SX_LD, saddr, 8, wrds - 1); + sxm(SX_LD, daddr, 40, wrds - 1); if (cnt > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17)); + sxi(SX_ROP(8, 40, 72, 15)); + sxi(SX_ROP(8, 56, 88, wrds - 17)); } else - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1)); - write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7)); + sxi(SX_ROP(8, 40, 72, wrds - 1)); + sxm(SX_ST, daddr, 72, wrds - 1); saddr += wrds << 2; daddr += wrds << 2; cnt -= wrds << 2; } if (cnt > 0) { - write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7)); - write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1)); - write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7)); + sxm(SX_LDB, saddr, 8, cnt - 1); + sxm(SX_LDB, daddr, 40, cnt - 1); + sxi(SX_ROP(8, 40, 72, cnt - 1)); + sxm(SX_STB, daddr, 72, cnt - 1); } next: srcstart += srcpitch; @@ -459,38 +439,38 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst daddr = dststart & ~3; while (h > 0) { - write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7)); - write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); + sxm(SX_LD, daddr, 80, wrds - 1); + sxm(SX_LD, saddr, sreg, swrds - 1); if (wrds > 15) { if (dist != 0) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); + sxi(SX_FUNNEL_I(8, dist, 40, 15)); + sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16)); /* shifted source pixels are now at register 40+ */ ssreg = 40; } else ssreg = 8; if (pre != 0) { /* mask out leading junk */ write_sx_reg(p, SX_QUEUED(R_MASK), lmask); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); + sxi(SX_ROPB(ssreg, 80, 8, 0)); write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14)); + sxi(SX_ROPB(ssreg + 1, 81, 9, 14)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15)); + sxi(SX_ROPB(ssreg, 80, 8, 15)); } - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); + sxi(SX_ROPB(ssreg + 16, 96, 24, wrds - 16)); } else { if (dist != 0) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); + sxi(SX_FUNNEL_I(8, dist, 40, wrds)); ssreg = 40; } else ssreg = 8; if (pre != 0) { /* mask out leading junk */ write_sx_reg(p, SX_QUEUED(R_MASK), lmask); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0)); + sxi(SX_ROPB(ssreg, 80, 8, 0)); write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds)); + sxi(SX_ROPB(ssreg + 1, 81, 9, wrds)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds)); + sxi(SX_ROPB(ssreg, 80, 8, wrds)); } } if (post != 0) { @@ -502,15 +482,15 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst * the left end but it's less annoying this way and * the instruction count is the same */ - write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0)); + sxi(SX_ANDS(7 + wrds, 7, 5, 0)); + sxi(SX_ANDS(79 + wrds, 6, 4, 0)); + sxi(SX_ORS(5, 4, 7 + wrds, 0)); } #ifdef DEBUG - write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); + sxm(SX_ST, taddr, 40, wrds - 1); taddr += dstpitch; #endif - write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7)); + sxm(SX_ST, daddr, 8, wrds - 1); saddr += srcpitch; daddr += dstpitch; h--; @@ -519,7 +499,8 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst /* up to 124 pixels so direction doesn't matter, unaligned, straight copy */ static void -CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch) +CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, + int srcpitch, int dstpitch) { int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post; int ssreg; @@ -571,30 +552,30 @@ CG14Copy8_short_norop(Cg14Ptr p, int src daddr = dststart & ~3; while (h > 0) { - write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7)); + sxm(SX_LD, saddr, sreg, swrds - 1); if (wrds > 15) { if (dist != 0) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16)); - /* shifted source pixels are now at register 40+ */ + sxi(SX_FUNNEL_I(8, dist, 40, 15)); + sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16)); + /* shifted source pixels are now at reg 40+ */ ssreg = 40; } else ssreg = 8; if (pre != 0) { /* read only the first word */ - write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); + sxm(SX_LD, daddr, 80, 0); /* mask out leading junk */ - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); + sxi(SX_ROPB(ssreg, 80, ssreg, 0)); } } else { if (dist != 0) { - write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds)); + sxi(SX_FUNNEL_I(8, dist, 40, wrds)); ssreg = 40; } else ssreg = 8; if (pre != 0) { /* read only the first word */ - write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7)); + sxm(SX_LD, daddr, 80, 0); /* mask out leading junk */ - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0)); + sxi(SX_ROPB(ssreg, 80, ssreg, 0)); } } if (post != 0) { @@ -607,16 +588,16 @@ CG14Copy8_short_norop(Cg14Ptr p, int src * the left end but it's less annoying this way and * the instruction count is the same */ - write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0)); + sxm(SX_LD, laddr, 81, 0); + sxi(SX_ANDS(ssreg + wrds - 1, 7, 5, 0)); + sxi(SX_ANDS(81, 6, 4, 0)); + sxi(SX_ORS(5, 4, ssreg + wrds - 1, 0)); } #ifdef DEBUG - write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7)); + sxm(SX_ST, taddr, 40, wrds - 1); taddr += dstpitch; #endif - write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7)); + sxm(SX_ST, daddr, ssreg, wrds - 1); saddr += srcpitch; daddr += dstpitch; h--; @@ -663,10 +644,12 @@ CG14Copy8(PixmapPtr pDstPixmap, if ((w < 125) && (w > 8)) { switch (p->last_rop) { case 0xcc: - CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc); + CG14Copy8_short_norop(p, + srcstart, dststart, w, h, srcinc, dstinc); break; default: - CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc); + CG14Copy8_short_rop(p, + srcstart, dststart, w, h, srcinc, dstinc); } return; } @@ -687,10 +670,12 @@ CG14Copy8(PixmapPtr pDstPixmap, if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) { switch (p->last_rop) { case 0xcc: - CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc); + CG14Copy8_aligned_norop(p, + srcstart, dststart, w, h, srcinc, dstinc); break; default: - CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc); + CG14Copy8_aligned_rop(p, + srcstart, dststart, w, h, srcinc, dstinc); } return; } @@ -706,18 +691,22 @@ CG14Copy8(PixmapPtr pDstPixmap, */ if (w > 8) { int next, wi, end = dststart + w; - DPRINTF(X_ERROR, "%s %08x %08x %d\n", __func__, srcstart, dststart, w); + DPRINTF(X_ERROR, "%s %08x %08x %d\n", + __func__, srcstart, dststart, w); if ((p->xdir < 0) && (srcoff == dstoff)) { srcstart += w; next = max((end - 120) & ~3, dststart); wi = end - next; srcstart -= wi; while (wi > 0) { - DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", __func__, srcstart, next, wi); + DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", + __func__, srcstart, next, wi); if (p->last_rop == 0xcc) { - CG14Copy8_short_norop(p, srcstart, next, wi, h, srcinc, dstinc); + CG14Copy8_short_norop(p, srcstart, + next, wi, h, srcinc, dstinc); } else - CG14Copy8_short_rop(p, srcstart, next, wi, h, srcinc, dstinc); + CG14Copy8_short_rop(p, srcstart, + next, wi, h, srcinc, dstinc); end = next; /* * avoid extremely narrow copies so I don't @@ -736,11 +725,16 @@ CG14Copy8(PixmapPtr pDstPixmap, next = min(end, (dststart + 124) & ~3); wi = next - dststart; while (wi > 0) { - DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", __func__, srcstart, next, wi); + DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", + __func__, srcstart, next, wi); if (p->last_rop == 0xcc) { - CG14Copy8_short_norop(p, srcstart, dststart, wi, h, srcinc, dstinc); + CG14Copy8_short_norop(p, + srcstart, dststart, wi, h, + srcinc, dstinc); } else - CG14Copy8_short_rop(p, srcstart, dststart, wi, h, srcinc, dstinc); + CG14Copy8_short_rop(p, + srcstart, dststart, wi, h, + srcinc, dstinc); srcstart += wi; dststart = next; if ((end - dststart) < 140) { @@ -769,10 +763,8 @@ CG14Copy8(PixmapPtr pDstPixmap, d = dststart; while ( count < w) { num = min(32, w - count); - write_sx_io(p, s, - SX_LDB(10, num - 1, s & 7)); - write_sx_io(p, d, - SX_STBM(10, num - 1, d & 7)); + sxm(SX_LDB, s, 10, num - 1); + sxm(SX_STBM, d, 10, num - 1); s += xinc; d += xinc; count += 32; @@ -788,10 +780,8 @@ CG14Copy8(PixmapPtr pDstPixmap, d = dststart; count = w; for (i = 0; i < chunks; i++) { - write_sx_io(p, s, - SX_LDB(10, 31, s & 7)); - write_sx_io(p, d, - SX_STBM(10, 31, d & 7)); + sxm(SX_LDB, s, 10, 31); + sxm(SX_STBM, d, 10, 31); s -= 32; d -= 32; count -= 32; @@ -800,10 +790,8 @@ CG14Copy8(PixmapPtr pDstPixmap, if (count > 0) { s += (32 - count); d += (32 - count); - write_sx_io(p, s, - SX_LDB(10, count - 1, s & 7)); - write_sx_io(p, d, - SX_STBM(10, count - 1, d & 7)); + sxm(SX_LDB, s, 10, count - 1); + sxm(SX_STBM, d, 10, count - 1); } srcstart += srcinc; dststart += dstinc; @@ -819,21 +807,15 @@ CG14Copy8(PixmapPtr pDstPixmap, d = dststart; while ( count < w) { num = min(32, w - count); - write_sx_io(p, s, - SX_LDB(10, num - 1, s & 7)); - write_sx_io(p, d, - SX_LDB(42, num - 1, d & 7)); + sxm(SX_LDB, s, 10, num - 1); + sxm(SX_LDB, d, 42, num - 1); if (num > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, num - 17)); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, num - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, num - 1)); + sxi(SX_ROP(10, 42, 74, num - 1)); } - write_sx_io(p, d, - SX_STBM(74, num - 1, d & 7)); + sxm(SX_STBM, d, 74, num - 1); s += xinc; d += xinc; count += 32; @@ -849,14 +831,11 @@ CG14Copy8(PixmapPtr pDstPixmap, d = dststart; count = w; for (i = 0; i < chunks; i++) { - write_sx_io(p, s, SX_LDB(10, 31, s & 7)); - write_sx_io(p, d, SX_LDB(42, 31, d & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, 15)); - write_sx_io(p, d, - SX_STBM(74, 31, d & 7)); + sxm(SX_LDB, s, 10, 31); + sxm(SX_LDB, d, 42, 31); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, 15)); + sxm(SX_STBM, d, 74, 31); s -= 128; d -= 128; count -= 32; @@ -865,22 +844,15 @@ CG14Copy8(PixmapPtr pDstPixmap, if (count > 0) { s += (32 - count); d += (32 - count); - write_sx_io(p, s, - SX_LDB(10, count - 1, s & 7)); - write_sx_io(p, d, - SX_LDB(42, count - 1, d & 7)); + sxm(SX_LDB, s, 10, count - 1); + sxm(SX_LDB, d, 42, count - 1); if (count > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(26, 58, 90, count - 17)); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(26, 58, 90, count - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, count - 1)); + sxi(SX_ROP(10, 42, 74, count - 1)); } - - write_sx_io(p, d, - SX_STBM(74, count - 1, d & 7)); + sxm(SX_STBM, d, 74, count - 1); } srcstart += srcinc; dststart += dstinc; @@ -956,8 +928,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u while (x < w) { ptr = start + (x << 2); num = min(32, w - x); - write_sx_io(p, ptr, - SX_STS(8, num - 1, ptr & 7)); + sxm(SX_STS, ptr, 8, num - 1); x += 32; } start += pitch; @@ -969,8 +940,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u /* alright, let's do actual ROP stuff */ /* first repeat the fill colour into 16 registers */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SELECT_S(8, 8, 10, 15)); + sxi(SX_SELECT_S(8, 8, 10, 15)); for (line = 0; line < h; line++) { x = 0; @@ -978,24 +948,19 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u ptr = start + (x << 2); num = min(32, w - x); /* now suck fb data into registers */ - write_sx_io(p, ptr, - SX_LD(42, num - 1, ptr & 7)); + sxm(SX_LD, ptr, 42, num - 1); /* * ROP them with the fill data we left in 10 * non-memory ops can only have counts up to 16 */ if (num <= 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, num - 1)); + sxi(SX_ROP(10, 42, 74, num - 1)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 58, 90, num - 17)); + sxi(SX_ROP(10, 42, 74, 15)); + sxi(SX_ROP(10, 58, 90, num - 17)); } /* and write the result back into memory */ - write_sx_io(p, ptr, - SX_ST(74, num - 1, ptr & 7)); + sxm(SX_ST, ptr, 74, num - 1); x += 32; } start += pitch; @@ -1020,7 +985,7 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui cnt = w; pre = min(pre, cnt); if (pre) { - write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7)); + sxm(SX_STBS, ptr, 8, pre - 1); ptr += pre; cnt -= pre; if (cnt == 0) goto next; @@ -1029,13 +994,13 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); while(cnt > 3) { num = min(32, cnt >> 2); - write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7)); + sxm(SX_STS, ptr, 8, num - 1); ptr += num << 2; cnt -= num << 2; } if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); if (cnt > 0) { - write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7)); + sxm(SX_STBS, ptr, 8, cnt - 1); } if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); next: @@ -1048,17 +1013,16 @@ next: /* alright, let's do actual ROP stuff */ /* first repeat the fill colour into 16 registers */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SELECT_S(8, 8, 10, 15)); + sxi(SX_SELECT_S(8, 8, 10, 15)); for (line = 0; line < h; line++) { ptr = start; cnt = w; pre = min(pre, cnt); if (pre) { - write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1)); - write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7)); + sxm(SX_LDB, ptr, 26, pre - 1); + sxi(SX_ROP(10, 26, 42, pre - 1)); + sxm(SX_STB, ptr, 42, pre - 1); ptr += pre; cnt -= pre; if (cnt == 0) goto next2; @@ -1067,25 +1031,22 @@ next: if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr); while(cnt > 3) { num = min(32, cnt >> 2); - write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7)); + sxm(SX_LD, ptr, 26, num - 1); if (num <= 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 26, 58, num - 1)); + sxi(SX_ROP(10, 26, 58, num - 1)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 26, 58, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ROP(10, 42, 74, num - 17)); + sxi(SX_ROP(10, 26, 58, 15)); + sxi(SX_ROP(10, 42, 74, num - 17)); } - write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7)); + sxm(SX_ST, ptr, 58, num - 1); ptr += num << 2; cnt -= num << 2; } if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt); if (cnt > 0) { - write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1)); - write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7)); + sxm(SX_LDB, ptr, 26, cnt - 1); + sxi(SX_ROP(10, 26, 42, cnt - 1)); + sxm(SX_STB, ptr, 42, cnt - 1); } if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w); next2: Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.14 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13 Wed Jul 24 16:07:59 2019 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c Fri Dec 24 04:41:40 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: cg14_render.c,v 1.13 2019/07/24 16:07:59 macallan Exp $ */ +/* $NetBSD: cg14_render.c,v 1.14 2021/12/24 04:41:40 macallan Exp $ */ /* * Copyright (c) 2013 Michael Lorenz * All rights reserved. @@ -75,37 +75,30 @@ void CG14Comp_Over32Solid(Cg14Ptr p, for (x = 0; x < width; x += 4) { rest = width - x; /* fetch 4 mask values */ - write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7)); + sxm(SX_LDUQ0, mskx, 12, 3); /* fetch destination pixels */ - write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); + sxm(SX_LDUQ0, dstx, 60, 3); /* duplicate them for all channels */ - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2)); + sxi(SX_ORS(0, 12, 13, 2)); + sxi(SX_ORS(0, 16, 17, 2)); + sxi(SX_ORS(0, 20, 21, 2)); + sxi(SX_ORS(0, 24, 25, 2)); /* generate inverted alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(12, 8, 28, 15)); + sxi(SX_XORS(12, 8, 28, 15)); /* multiply source */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 12, 44, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 16, 48, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 20, 52, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 24, 56, 3)); + sxi(SX_MUL16X16SR8(8, 12, 44, 3)); + sxi(SX_MUL16X16SR8(8, 16, 48, 3)); + sxi(SX_MUL16X16SR8(8, 20, 52, 3)); + sxi(SX_MUL16X16SR8(8, 24, 56, 3)); /* multiply dest */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(28, 60, 76, 15)); + sxi(SX_MUL16X16SR8(28, 60, 76, 15)); /* add up */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(44, 76, 92, 15)); + sxi(SX_ADDV(44, 76, 92, 15)); /* write back */ if (rest < 4) { - write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 92, rest - 1); } else { - write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); + sxm(SX_STUQ0C, dstx, 92, 3); } dstx += 16; mskx += 16; @@ -118,7 +111,7 @@ void CG14Comp_Over32Solid(Cg14Ptr p, /* nothing to do - all transparent */ } else if (m == 0xff) { /* all opaque */ - write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); + sxm(SX_STUQ0, dstx, 8, 0); } else { /* fetch alpha value, stick it into scam */ /* mask is in R[12:15] */ @@ -126,28 +119,22 @@ void CG14Comp_Over32Solid(Cg14Ptr p, SX_LDUQ0(12, 0, mskx & 7));*/ write_sx_reg(p, SX_QUEUED(12), m); /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORV(12, 0, R_SCAM, 0)); + sxm(SX_LDUQ0, dstx, 20, 0); + sxi(SX_ORV(12, 0, R_SCAM, 0)); /* * src * alpha + R0 * R[9:11] * SCAM + R0 -> R[17:19] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(9, 0, 17, 2)); + sxi(SX_SAXP16X16SR8(9, 0, 17, 2)); /* invert SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(12, 8, R_SCAM, 0)); + sxi(SX_XORV(12, 8, R_SCAM, 0)); #ifdef SX_DEBUG - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(12, 8, 13, 0)); + sxi(SX_XORV(12, 8, 13, 0)); #endif /* dst * (1 - alpha) + R[13:15] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(21, 17, 25, 2)); - write_sx_io(p, dstx, - SX_STUQ0C(24, 0, dstx & 7)); + sxi(SX_SAXP16X16SR8(21, 17, 25, 2)); + sxm(SX_STUQ0C, dstx, 24, 0); } dstx += 4; mskx += 4; @@ -181,37 +168,30 @@ void CG14Comp_Over8Solid(Cg14Ptr p, for (x = 0; x < width; x += 4) { rest = width - x; /* fetch 4 mask values */ - write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7)); + sxm(SX_LDB, mskx, 12, 3); /* fetch destination pixels */ - write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7)); + sxm(SX_LDUQ0, dstx, 60, 3); /* duplicate them for all channels */ - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2)); + sxi(SX_ORS(0, 13, 16, 3)); + sxi(SX_ORS(0, 14, 20, 3)); + sxi(SX_ORS(0, 15, 24, 3)); + sxi(SX_ORS(0, 12, 13, 2)); /* generate inverted alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(12, 8, 28, 15)); + sxi(SX_XORS(12, 8, 28, 15)); /* multiply source */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 12, 44, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 16, 48, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 20, 52, 3)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(8, 24, 56, 3)); + sxi(SX_MUL16X16SR8(8, 12, 44, 3)); + sxi(SX_MUL16X16SR8(8, 16, 48, 3)); + sxi(SX_MUL16X16SR8(8, 20, 52, 3)); + sxi(SX_MUL16X16SR8(8, 24, 56, 3)); /* multiply dest */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_MUL16X16SR8(28, 60, 76, 15)); + sxi(SX_MUL16X16SR8(28, 60, 76, 15)); /* add up */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(44, 76, 92, 15)); + sxi(SX_ADDV(44, 76, 92, 15)); /* write back */ if (rest < 4) { - write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 92, rest - 1); } else { - write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7)); + sxm(SX_STUQ0C, dstx, 92, 3); } dstx += 16; mskx += 4; @@ -226,7 +206,7 @@ void CG14Comp_Over8Solid(Cg14Ptr p, /* nothing to do - all transparent */ } else if (m == 0xff) { /* all opaque */ - write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7)); + sxm(SX_STUQ0, dstx, 8, 0); } else { /* fetch alpha value, stick it into scam */ /* mask is in R[12:15] */ @@ -234,28 +214,22 @@ void CG14Comp_Over8Solid(Cg14Ptr p, SX_LDB(12, 0, mskx & 7));*/ write_sx_reg(p, SX_QUEUED(12), m); /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORV(12, 0, R_SCAM, 0)); + sxm(SX_LDUQ0, dstx, 20, 0); + sxi(SX_ORV(12, 0, R_SCAM, 0)); /* * src * alpha + R0 * R[9:11] * SCAM + R0 -> R[17:19] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(9, 0, 17, 2)); + sxi(SX_SAXP16X16SR8(9, 0, 17, 2)); /* invert SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(12, 8, R_SCAM, 0)); + sxi(SX_XORV(12, 8, R_SCAM, 0)); #ifdef SX_DEBUG - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(12, 8, 13, 0)); + sxi(SX_XORV(12, 8, 13, 0)); #endif /* dst * (1 - alpha) + R[13:15] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(21, 17, 25, 2)); - write_sx_io(p, dstx, - SX_STUQ0C(24, 0, dstx & 7)); + sxi(SX_SAXP16X16SR8(21, 17, 25, 2)); + sxm(SX_STUQ0C, dstx, 24, 0); } dstx += 4; mskx += 1; @@ -287,30 +261,25 @@ void CG14Comp_Add32(Cg14Ptr p, srcx = src; dstx = dst; for (x = 0; x < full; x++) { - write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7)); - write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, 15)); - write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7)); + sxm(SX_LDUQ0, srcx, 8, 31); + sxm(SX_LDUQ0, dstx, 40, 31); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, 15)); + sxm(SX_STUQ0, dstx, 72, 31); srcx += 128; dstx += 128; } /* do leftovers */ - write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7)); - write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7)); + sxm(SX_LDUQ0, srcx, 8, part - 1); + sxm(SX_LDUQ0, dstx, 40, part - 1); if (part & 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, part - 17)); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, part - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, part - 1)); + sxi(SX_ADDV(8, 40, 72, part - 1)); } - write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7)); + sxm(SX_STUQ0, dstx, 72, part - 1); /* next line */ src += srcpitch; @@ -355,10 +324,8 @@ void CG14Comp_Add8(Cg14Ptr p, for (x = 0; x < full; x++) { write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); write_sx_io(p, dstx, SX_LDB(40, 31, dstoff)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, 15)); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, 15)); write_sx_io(p, dstx, SX_STBC(72, 31, dstoff)); srcx += 32; dstx += 32; @@ -369,13 +336,10 @@ void CG14Comp_Add8(Cg14Ptr p, write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff)); if (part > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, part - 17)); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, part - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, part - 1)); + sxi(SX_ADDV(8, 40, 72, part - 1)); } write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff)); } @@ -426,10 +390,8 @@ void CG14Comp_Add8_32(Cg14Ptr p, write_sx_io(p, srcx, SX_LDB(8, 31, srcoff)); /* load alpha from destination */ write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, 15)); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, 15)); /* write clamped values back into dest alpha */ write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff)); srcx += 32; @@ -441,13 +403,10 @@ void CG14Comp_Add8_32(Cg14Ptr p, write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff)); write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff)); if (part > 16) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, 15)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(24, 56, 88, part - 17)); + sxi(SX_ADDV(8, 40, 72, 15)); + sxi(SX_ADDV(24, 56, 88, part - 17)); } else { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ADDV(8, 40, 72, part - 1)); + sxi(SX_ADDV(8, 40, 72, part - 1)); } write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff)); } @@ -488,31 +447,24 @@ void CG14Comp_Over32(Cg14Ptr p, continue; } /* fetch source pixels */ - write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); + sxm(SX_LDUQ0, srcx, 12, num - 1); if (flip) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(13, 4, 40, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(15, 4, 44, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(40, 4, 15, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(44, 4, 13, num - 1)); + sxi(SX_GATHER(13, 4, 40, num - 1)); + sxi(SX_GATHER(15, 4, 44, num - 1)); + sxi(SX_SCATTER(40, 4, 15, num - 1)); + sxi(SX_SCATTER(44, 4, 13, num - 1)); } /* fetch dst pixels */ - write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + sxm(SX_LDUQ0, dstx, 44, num - 1); /* now process up to 4 pixels */ for (i = 0; i < num; i++) { int ii = i << 2; /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(12 + ii, 8, R_SCAM, 0)); + sxi(SX_XORS(12 + ii, 8, R_SCAM, 0)); /* dst * (1 - alpha) + src */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3)); + sxi(SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3)); } - write_sx_io(p, dstx, - SX_STUQ0C(76, num - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 76, num - 1); srcx += 16; dstx += 16; } @@ -546,39 +498,30 @@ void CG14Comp_Over32Mask(Cg14Ptr p, continue; } /* fetch source pixels */ - write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); + sxm(SX_LDUQ0, srcx, 12, num - 1); if (flip) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(13, 4, 40, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(15, 4, 44, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(40, 4, 15, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(44, 4, 13, num - 1)); + sxi(SX_GATHER(13, 4, 40, num - 1)); + sxi(SX_GATHER(15, 4, 44, num - 1)); + sxi(SX_SCATTER(40, 4, 15, num - 1)); + sxi(SX_SCATTER(44, 4, 13, num - 1)); } /* fetch mask */ - write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); + sxm(SX_LDB, mskx, 28, num - 1); /* fetch dst pixels */ - write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + sxm(SX_LDUQ0, dstx, 44, num - 1); /* now process up to 4 pixels */ for (i = 0; i < num; i++) { int ii = i << 2; /* mask alpha to SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(28 + i, 0, R_SCAM, 0)); + sxi(SX_ORS(28 + i, 0, R_SCAM, 0)); /* src * alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(28 + i, 8, R_SCAM, 0)); + sxi(SX_XORS(28 + i, 8, R_SCAM, 0)); /* dst * (1 - alpha) + R[60:] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); } - write_sx_io(p, dstx, - SX_STUQ0C(76, num - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 76, num - 1); srcx += 16; mskx += 4; dstx += 16; @@ -602,7 +545,7 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr write_sx_reg(p, SX_QUEUED(8), 0xff); write_sx_reg(p, SX_QUEUED(9), 0xff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); + sxi(SX_ORS(8, 0, 10, 1)); for (line = 0; line < height; line++) { srcx = src; mskx = msk; @@ -616,42 +559,32 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr continue; } /* fetch source pixels */ - write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); + sxm(SX_LDUQ0, srcx, 12, num - 1); if (flip) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(13, 4, 40, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(15, 4, 44, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(40, 4, 15, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(44, 4, 13, num - 1)); + sxi(SX_GATHER(13, 4, 40, num - 1)); + sxi(SX_GATHER(15, 4, 44, num - 1)); + sxi(SX_SCATTER(40, 4, 15, num - 1)); + sxi(SX_SCATTER(44, 4, 13, num - 1)); } /* fetch mask */ - write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); + sxm(SX_LDB, mskx, 28, num - 1); /* fetch dst pixels */ - write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + sxm(SX_LDUQ0, dstx, 44, num - 1); /* set src alpha to 0xff */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(8, 4, 12, num - 1)); + sxi(SX_SCATTER(8, 4, 12, num - 1)); /* now process up to 4 pixels */ for (i = 0; i < num; i++) { int ii = i << 2; /* mask alpha to SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(28 + i, 0, R_SCAM, 0)); + sxi(SX_ORS(28 + i, 0, R_SCAM, 0)); /* src * alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(28 + i, 8, R_SCAM, 0)); + sxi(SX_XORS(28 + i, 8, R_SCAM, 0)); /* dst * (1 - alpha) + R[60:] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); } - write_sx_io(p, dstx, - SX_STUQ0C(76, num - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 76, num - 1); srcx += 16; mskx += 4; dstx += 16; @@ -675,7 +608,7 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P write_sx_reg(p, SX_QUEUED(8), 0xff); write_sx_reg(p, SX_QUEUED(9), 0xff); - write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); + sxi(SX_ORS(8, 0, 10, 1)); for (line = 0; line < height; line++) { srcx = src; mskx = msk; @@ -689,42 +622,32 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P continue; } /* fetch source pixels */ - write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); + sxm(SX_LDUQ0, srcx, 12, num - 1); if (flip) { - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(13, 4, 40, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_GATHER(15, 4, 44, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(40, 4, 15, num - 1)); - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(44, 4, 13, num - 1)); + sxi(SX_GATHER(13, 4, 40, num - 1)); + sxi(SX_GATHER(15, 4, 44, num - 1)); + sxi(SX_SCATTER(40, 4, 15, num - 1)); + sxi(SX_SCATTER(44, 4, 13, num - 1)); } /* fetch mask */ - write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7)); + sxm(SX_LDUQ0, mskx, 28, num - 1); /* fetch dst pixels */ - write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + sxm(SX_LDUQ0, dstx, 44, num - 1); /* set src alpha to 0xff */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SCATTER(8, 4, 12, num - 1)); + sxi(SX_SCATTER(8, 4, 12, num - 1)); /* now process up to 4 pixels */ for (i = 0; i < num; i++) { int ii = i << 2; /* mask alpha to SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(28 + ii, 0, R_SCAM, 0)); + sxi(SX_ORS(28 + ii, 0, R_SCAM, 0)); /* src * alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(28 + ii, 8, R_SCAM, 0)); + sxi(SX_XORS(28 + ii, 8, R_SCAM, 0)); /* dst * (1 - alpha) + R[60:] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); } - write_sx_io(p, dstx, - SX_STUQ0C(76, num - 1, dstx & 7)); + sxm(SX_STUQ0C, dstx, 76, num - 1); srcx += 16; mskx += 16; dstx += 16;