Module Name: xsrc Committed By: macallan Date: Fri Dec 8 22:49:37 UTC 2017
Modified Files: xsrc/external/mit/xf86-video-suncg14/dist/src: cg14_render.c Log Message: do up to 4 pixels at a time CG14Comp_Over*() To generate a diff of this commit: cvs rdiff -u -r1.11 -r1.12 \ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.11 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.12 --- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.11 Thu Dec 7 19:23:22 2017 +++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c Fri Dec 8 22:49:37 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: cg14_render.c,v 1.11 2017/12/07 19:23:22 macallan Exp $ */ +/* $NetBSD: cg14_render.c,v 1.12 2017/12/08 22:49:37 macallan Exp $ */ /* * Copyright (c) 2013 Michael Lorenz * All rights reserved. @@ -471,8 +471,8 @@ void CG14Comp_Over32(Cg14Ptr p, uint32_t dst, uint32_t dstpitch, int width, int height, int flip) { - uint32_t srcx, dstx, m; - int line, x, i; + uint32_t srcx, dstx, mskx, m; + int line, x, i, num; ENTER; @@ -481,33 +481,44 @@ void CG14Comp_Over32(Cg14Ptr p, srcx = src; dstx = dst; - for (x = 0; x < width; x++) { - /* fetch source pixel */ - write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); + for (x = 0; x < width; x += 4) { + /* we do up to 4 pixels at a time */ + num = min(4, width - x); + if (num <= 0) { + xf86Msg(X_ERROR, "wtf?!\n"); + continue; + } + /* fetch source pixels */ + write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); if (flip) { write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(13, 0, 40, 0)); + SX_GATHER(13, 4, 40, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(15, 0, 13, 0)); + SX_GATHER(15, 4, 44, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(40, 0, 15, 0)); + SX_SCATTER(40, 4, 15, num - 1)); + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SCATTER(44, 4, 13, num - 1)); + } + /* fetch dst pixels */ + write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + /* now process up to 4 pixels */ + for (i = 0; i < num; i++) { + int ii = i << 2; + /* write inverted alpha into SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_XORS(12 + ii, 8, R_SCAM, 0)); + /* dst * (1 - alpha) + src */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3)); } - /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - /* src is premultiplied with alpha */ - /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(12, 8, R_SCAM, 0)); - /* dst * (1 - alpha) + R[13:15] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(20, 12, 24, 3)); write_sx_io(p, dstx, - SX_STUQ0C(24, 0, dstx & 7)); - dstx += 4; - srcx += 4; + SX_STUQ0C(76, num - 1, dstx & 7)); + srcx += 16; + dstx += 16; } - dst += dstpitch; src += srcpitch; + dst += dstpitch; } } @@ -518,7 +529,7 @@ void CG14Comp_Over32Mask(Cg14Ptr p, int width, int height, int flip) { uint32_t srcx, dstx, mskx, m; - int line, x, i; + int line, x, i, num; ENTER; @@ -528,39 +539,50 @@ void CG14Comp_Over32Mask(Cg14Ptr p, mskx = msk; dstx = dst; - for (x = 0; x < width; x++) { - /* fetch source pixel */ - write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); + for (x = 0; x < width; x += 4) { + /* we do up to 4 pixels at a time */ + num = min(4, width - x); + if (num <= 0) { + xf86Msg(X_ERROR, "wtf?!\n"); + continue; + } + /* fetch source pixels */ + write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); if (flip) { write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(13, 0, 40, 0)); + SX_GATHER(13, 4, 40, num - 1)); + write_sx_reg(p, SX_INSTRUCTIONS, + SX_GATHER(15, 4, 44, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(15, 0, 13, 0)); + SX_SCATTER(40, 4, 15, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(40, 0, 15, 0)); + SX_SCATTER(44, 4, 13, num - 1)); } /* fetch mask */ - write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); - /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - /* stick mask alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(9, 0, R_SCAM, 0)); - /* apply mask */ - /* src is premultiplied with alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12, 0, 16, 3)); - /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(16, 8, R_SCAM, 0)); - /* dst * (1 - alpha) + R[13:15] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(20, 16, 24, 3)); + write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); + /* fetch dst pixels */ + write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + /* now process up to 4 pixels */ + for (i = 0; i < num; i++) { + int ii = i << 2; + /* mask alpha to SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_ORS(28 + i, 0, R_SCAM, 0)); + /* src * alpha */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + /* write inverted alpha into SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_XORS(28 + i, 8, R_SCAM, 0)); + /* dst * (1 - alpha) + R[60:] */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + } write_sx_io(p, dstx, - SX_STUQ0C(24, 0, dstx & 7)); - srcx += 4; - mskx += 1; - dstx += 4; + SX_STUQ0C(76, num - 1, dstx & 7)); + srcx += 16; + mskx += 4; + dstx += 16; } src += srcpitch; msk += mskpitch; @@ -575,51 +597,65 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr int width, int height, int flip) { uint32_t srcx, dstx, mskx, m; - int line, x, i; + int line, x, i, num; ENTER; write_sx_reg(p, SX_QUEUED(8), 0xff); + write_sx_reg(p, SX_QUEUED(9), 0xff); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); for (line = 0; line < height; line++) { srcx = src; mskx = msk; dstx = dst; - for (x = 0; x < width; x++) { - /* fetch source pixel */ - write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); + for (x = 0; x < width; x += 4) { + /* we do up to 4 pixels at a time */ + num = min(4, width - x); + if (num <= 0) { + xf86Msg(X_ERROR, "wtf?!\n"); + continue; + } + /* fetch source pixels */ + write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); if (flip) { write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(13, 0, 40, 0)); + SX_GATHER(13, 4, 40, num - 1)); + write_sx_reg(p, SX_INSTRUCTIONS, + SX_GATHER(15, 4, 44, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(15, 0, 13, 0)); + SX_SCATTER(40, 4, 15, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(40, 0, 15, 0)); + SX_SCATTER(44, 4, 13, num - 1)); } - /* set src alpha to 0xff */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(8, 0, 12, 0)); /* fetch mask */ - write_sx_io(p, mskx & (~7), SX_LDB(9, 0, mskx & 7)); - /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - /* write alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(9, 0, R_SCAM, 0)); - /* src * alpha + R0 */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12, 0, 16, 3)); - /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORV(9, 8, R_SCAM, 0)); - /* dst * (1 - alpha) + R[13:15] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(20, 16, 24, 3)); + write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7)); + /* fetch dst pixels */ + write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + /* set src alpha to 0xff */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SCATTER(8, 4, 12, num - 1)); + /* now process up to 4 pixels */ + for (i = 0; i < num; i++) { + int ii = i << 2; + /* mask alpha to SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_ORS(28 + i, 0, R_SCAM, 0)); + /* src * alpha */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + /* write inverted alpha into SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_XORS(28 + i, 8, R_SCAM, 0)); + /* dst * (1 - alpha) + R[60:] */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + } write_sx_io(p, dstx, - SX_STUQ0C(24, 0, dstx & 7)); - srcx += 4; - mskx += 1; - dstx += 4; + SX_STUQ0C(76, num - 1, dstx & 7)); + srcx += 16; + mskx += 4; + dstx += 16; } src += srcpitch; msk += mskpitch; @@ -634,51 +670,65 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P int width, int height, int flip) { uint32_t srcx, dstx, mskx, m; - int line, x, i; + int line, x, i, num; ENTER; write_sx_reg(p, SX_QUEUED(8), 0xff); + write_sx_reg(p, SX_QUEUED(9), 0xff); + write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1)); for (line = 0; line < height; line++) { srcx = src; mskx = msk; dstx = dst; - for (x = 0; x < width; x++) { - /* fetch source pixel */ - write_sx_io(p, srcx, SX_LDUQ0(12, 0, srcx & 7)); + for (x = 0; x < width; x += 4) { + /* we do up to 4 pixels at a time */ + num = min(4, width - x); + if (num <= 0) { + xf86Msg(X_ERROR, "wtf?!\n"); + continue; + } + /* fetch source pixels */ + write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7)); if (flip) { write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(13, 0, 40, 0)); + SX_GATHER(13, 4, 40, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(15, 0, 13, 0)); + SX_GATHER(15, 4, 44, num - 1)); write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(40, 0, 15, 0)); + SX_SCATTER(40, 4, 15, num - 1)); + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SCATTER(44, 4, 13, num - 1)); } /* fetch mask */ - write_sx_io(p, mskx, SX_LDUQ0(16, 0, mskx & 7)); - /* fetch dst pixel */ - write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7)); - /* set src alpha to 0xff */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(8, 0, 12, 0)); - /* mask alpha to SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_ORS(16, 0, R_SCAM, 0)); - /* src * alpha */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(12, 0, 24, 3)); - /* write inverted alpha into SCAM */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_XORS(16, 8, R_SCAM, 0)); - /* dst * (1 - alpha) + R[24:31] */ - write_sx_reg(p, SX_INSTRUCTIONS, - SX_SAXP16X16SR8(20, 24, 28, 3)); + write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7)); + /* fetch dst pixels */ + write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7)); + /* set src alpha to 0xff */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SCATTER(8, 4, 12, num - 1)); + /* now process up to 4 pixels */ + for (i = 0; i < num; i++) { + int ii = i << 2; + /* mask alpha to SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_ORS(28 + ii, 0, R_SCAM, 0)); + /* src * alpha */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3)); + /* write inverted alpha into SCAM */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_XORS(28 + ii, 8, R_SCAM, 0)); + /* dst * (1 - alpha) + R[60:] */ + write_sx_reg(p, SX_INSTRUCTIONS, + SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3)); + } write_sx_io(p, dstx, - SX_STUQ0C(28, 0, dstx & 7)); - srcx += 4; - mskx += 4; - dstx += 4; + SX_STUQ0C(76, num - 1, dstx & 7)); + srcx += 16; + mskx += 16; + dstx += 16; } src += srcpitch; msk += mskpitch;